From bd475d8f017f0fc76a8f218a6fe8b53b9cf29509 Mon Sep 17 00:00:00 2001 From: Guillem Hernandez Sola Date: Tue, 21 Apr 2026 17:34:10 +0200 Subject: [PATCH] Added all --- manga-translator.py | 110 +++++++++++++------------------------------- 1 file changed, 32 insertions(+), 78 deletions(-) diff --git a/manga-translator.py b/manga-translator.py index ce832f2..3997112 100644 --- a/manga-translator.py +++ b/manga-translator.py @@ -104,14 +104,21 @@ def looks_like_box_tag(t: str) -> bool: def is_noise_text(text: str) -> bool: t = (text or "").strip() - if re.fullmatch(r"[\?\!]+", t): + # ALLOW pure punctuation clusters like "...", "!!", "?!" + if re.fullmatch(r"[\?\!\.]+", t): + return False + + # ALLOW single alphabetical characters (crucial for vertical text) + if len(t) == 1 and t.isalpha(): return False if any(re.fullmatch(p, t) for p in NOISE_PATTERNS): return True if looks_like_box_tag(t): return True - if len(t) <= 2 and not re.search(r"[A-Z0-9\?\!]", t): + + # Relaxed the length check to allow 1-2 letter words and punctuation + if len(t) <= 2 and not re.search(r"[A-Z0-9\?\!\.]", t) and not t.isalpha(): return True symbol_ratio = sum(1 for c in t if not c.isalnum() and not c.isspace()) / max(1, len(t)) @@ -193,27 +200,10 @@ def ocr_candidate_score(text: str) -> float: # SPLITTERS # ============================================================ def split_panel_box(image_bgr, bbox_xyxy, bubble_quads=None): - """ - Checks if a bounding box crosses a solid vertical panel border. - - This detects vertical lines/borders within the bubble that might indicate - the bubble should be split into left/right columns. - - Only splits if: - - Bubble has 10+ quads (real multi-column layouts have many quads, while nested bubbles have fewer) - - Height is sufficient (50+ pixels) - - Nested bubbles typically have 1-6 quads, while true multi-column bubbles have 8+. - - Returns: - Tuple (box_left, box_right, split_x_absolute) if split found, else None - """ x1, y1, x2, y2 = bbox_xyxy w = x2 - x1 h = y2 - y1 - # Prevent false splits: require substantial number of quads - # Nested bubbles have 1-6 quads, true multi-column layouts have 8+ if bubble_quads is not None and len(bubble_quads) < 10: return None @@ -238,31 +228,23 @@ def split_panel_box(image_bgr, bbox_xyxy, bubble_quads=None): peak_x_relative = np.argmax(vertical_projection[search_start:search_end]) + search_start peak_val = vertical_projection[peak_x_relative] - # Find ALL significant peaks, not just the first - # This helps detect internal gaps between text regions (left vs right bubbles) - threshold_val = h * 255 * 0.25 # Very sensitive threshold + threshold_val = h * 255 * 0.25 significant_peaks = [] for x_rel in range(search_start, search_end): if vertical_projection[x_rel] > threshold_val: significant_peaks.append((x_rel, vertical_projection[x_rel])) - # If we have multiple peaks, find the largest gap between text regions - # by looking for the valley (lowest projection value) between peaks if len(significant_peaks) > 1: - # Find the minimum value (gap) between the first and last peak min_proj_val = np.min(vertical_projection[search_start:search_end]) min_proj_idx = np.argmin(vertical_projection[search_start:search_end]) + search_start - # Use the valley point as the split, not the peak - # This more accurately separates left-aligned vs right-aligned content - if min_proj_val < threshold_val * 0.6: # Valley is clearly a gap + if min_proj_val < threshold_val * 0.6: split_x_absolute = x1 + min_proj_idx box_left = (x1, y1, split_x_absolute, y2) box_right = (split_x_absolute, y1, x2, y2) return box_left, box_right, split_x_absolute - # Fallback: if the main peak is significant enough, use it if peak_val > (h * 255 * 0.40): split_x_absolute = x1 + peak_x_relative box_left = (x1, y1, split_x_absolute, y2) @@ -273,23 +255,12 @@ def split_panel_box(image_bgr, bbox_xyxy, bubble_quads=None): def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thresholds=False): - """ - Checks if the text inside a bubble is actually two separate columns of text - by looking for a clean vertical gap between the bounding boxes. - - Improved to detect large column gaps (e.g., 100+ pixels between text regions). - - Args: - use_aggressive_thresholds: If True, use lower thresholds (25px, 1.5x line height) - for fallback splitting after failed panel border detection - """ if len(indices) < 2: return None boxes = [quad_bbox(ocr[i][0]) for i in indices] sorted_items = sorted(zip(indices, boxes), key=lambda x: x[1][0]) - # Calculate gaps between consecutive elements sorted by x-coordinate gaps = [] current_max_x = sorted_items[0][1][2] @@ -303,33 +274,25 @@ def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thre if not gaps: return None - # Find the largest gap max_gap_idx, max_gap_size, _, _ = max(gaps, key=lambda x: x[1]) - # Calculate thresholds based on line height hs = [b[3] - b[1] for b in boxes] med_h = float(np.median(hs)) if hs else 15.0 if use_aggressive_thresholds: - # Relaxed thresholds for fallback splitting after failed panel border - threshold1 = 80.0 # Increased from 10 to require significant gaps - threshold2 = med_h * 1.2 # Increased from 0.8 - min_gap = 40.0 # Increased from 5 + threshold1 = 80.0 + threshold2 = med_h * 1.2 + min_gap = 40.0 else: - # Normal thresholds - very conservative to avoid breaking valid bubbles - threshold1 = 120.0 # Increased from 50 - require very large gaps - threshold2 = med_h * 3.0 # Increased from 2.0 - require 3x line height - min_gap = 60.0 # Increased from 25 - - + threshold1 = 120.0 + threshold2 = med_h * 3.0 + min_gap = 60.0 if max_gap_size > threshold1 or (max_gap_size > threshold2 and max_gap_size > min_gap): split_idx = max_gap_idx left_indices = [item[0] for item in sorted_items[:split_idx]] right_indices = [item[0] for item in sorted_items[split_idx:]] - # Additional safety: don't split if one side only has 1 detection - # This prevents breaking up valid bubbles with just a few words if len(left_indices) < 2 or len(right_indices) < 2: return None @@ -667,9 +630,7 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3): med_h = float(np.median(hs)) if hs else 12.0 dist_thresh = max(20.0, med_h * 2.2) - # Adaptive vertical gap: allow up to 1.0x median line height for vertical gaps - # This handles cases where lines are spaced further apart (e.g., multi-line bubbles) - adaptive_gap_y = max(gap_px, med_h * 1.0) + adaptive_gap_y = max(gap_px, med_h * 2.5) p = list(range(n)) @@ -684,17 +645,23 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3): for i in range(n): for j in range(i + 1, n): - # Use adaptive gap for vertical spacing, fixed gap for horizontal ax1, ay1, ax2, ay2 = boxes[i] bx1, by1, bx2, by2 = boxes[j] gap_x = max(0, max(ax1, bx1) - min(ax2, bx2)) gap_y = max(0, max(ay1, by1) - min(ay2, by2)) + cx1, cy1 = centers[i] + cx2, cy2 = centers[j] + is_vertically_aligned = abs(cx1 - cx2) < (med_h * 1.5) + + if is_vertically_aligned and gap_y <= (med_h * 4.0): + unite(i, j) + continue + if gap_x <= gap_px and gap_y <= adaptive_gap_y: unite(i, j) continue - cx1, cy1 = centers[i] - cx2, cy2 = centers[j] + d = ((cx1 - cx2) ** 2 + (cy1 - cy2) ** 2) ** 0.5 if d <= dist_thresh and abs(cy1 - cy2) <= med_h * 3.0: unite(i, j) @@ -721,8 +688,6 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3): x1, y1, x2, y2 = ub - # Adaptive padding: scale with median line height to ensure all boundary chars are captured - # Use max of fixed padding or line-height-based padding adaptive_pad = max(bbox_padding, int(round(med_h * 0.35))) x1 = max(0, x1 - adaptive_pad); y1 = max(0, y1 - adaptive_pad) x2 = min(iw - 1, x2 + adaptive_pad); y2 = min(ih - 1, y2 + adaptive_pad) @@ -856,7 +821,7 @@ def translate_manga_text( image_path="001-page.png", source_lang="en", target_lang="ca", - confidence_threshold=0.12, + confidence_threshold=0.05, min_text_length=1, gap_px="auto", filter_sound_effects=True, @@ -913,16 +878,14 @@ def translate_manga_text( filtered, image.shape, gap_px=resolved_gap, bbox_padding=3 ) - # ── NEW: SPLIT MULTI-PANEL & MULTI-COLUMN BUBBLES ── new_bubbles, new_bubble_boxes, new_bubble_quads, new_bubble_indices = {}, {}, {}, {} next_bid = max(bubbles.keys()) + 1 if bubbles else 1 splits_performed = [] for bid in list(bubbles.keys()): box = bubble_boxes[bid] - bubble_split = None # Will hold (left_idxs, right_idxs) if a split is detected + bubble_split = None - # 1. Try Image-based Panel Border Split split_result = split_panel_box(image, box, bubble_quads=bubble_quads[bid]) if split_result: box_left, box_right, split_x = split_result @@ -939,7 +902,6 @@ def translate_manga_text( bubble_split = (left_idxs, right_idxs) splits_performed.append(f"BOX#{bid} (panel border at x={split_x})") elif len(bubble_quads[bid]) >= 10: - # Panel border split failed (all quads on one side). Try text column split for large bubbles col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid, use_aggressive_thresholds=True) if col_split: left_idxs, right_idxs = col_split @@ -947,7 +909,6 @@ def translate_manga_text( bubble_split = (left_idxs, right_idxs) splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)") - # 2. If no panel border split, try text column split if bubble_split is None: col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid) if col_split: @@ -956,17 +917,14 @@ def translate_manga_text( bubble_split = (left_idxs, right_idxs) splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)") - # 3. Create split bubbles if detected if bubble_split: left_idxs, right_idxs = bubble_split - # Create Left Bubble new_bubbles[bid] = build_lines_from_indices(left_idxs, filtered) ub_left = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in left_idxs]) new_bubble_boxes[bid] = (max(0, ub_left[0]-3), max(0, ub_left[1]-3), min(iw-1, ub_left[2]+3), min(ih-1, ub_left[3]+3)) new_bubble_quads[bid] = [filtered[i][0] for i in left_idxs] new_bubble_indices[bid] = left_idxs - # Create Right Bubble new_bubbles[next_bid] = build_lines_from_indices(right_idxs, filtered) ub_right = boxes_union_xyxy([quad_bbox(filtered[i][0]) for i in right_idxs]) new_bubble_boxes[next_bid] = (max(0, ub_right[0]-3), max(0, ub_right[1]-3), min(iw-1, ub_right[2]+3), min(ih-1, ub_right[3]+3)) @@ -974,24 +932,20 @@ def translate_manga_text( new_bubble_indices[next_bid] = right_idxs next_bid += 1 else: - # No split needed, keep original new_bubbles[bid] = bubbles[bid] new_bubble_boxes[bid] = bubble_boxes[bid] new_bubble_quads[bid] = bubble_quads[bid] new_bubble_indices[bid] = bubble_indices[bid] - # Print split summary if splits_performed: print(f"\nšŸ”€ Multi-column bubble splits detected: {len(splits_performed)}") for split_info in splits_performed: print(f" āœ“ Split {split_info}") - # Overwrite old dictionaries with the newly split ones bubbles = new_bubbles bubble_boxes = new_bubble_boxes bubble_quads = new_bubble_quads bubble_indices = new_bubble_indices - # ─────────────────────────────────────────────────── translator = GoogleTranslator(source=source_lang, target=target_lang) @@ -1098,8 +1052,8 @@ if __name__ == "__main__": image_path="004.png", source_lang="en", target_lang="ca", - confidence_threshold=0.12, - min_text_length=2, + confidence_threshold=0.05, + min_text_length=1, gap_px="auto", filter_sound_effects=True, quality_threshold=0.62, @@ -1107,4 +1061,4 @@ if __name__ == "__main__": export_bubbles_to="bubbles.json", reading_mode="ltr", debug=True - ) \ No newline at end of file + )