From f00647e668476bae98aba77bf96d5a0119e86908 Mon Sep 17 00:00:00 2001 From: Guillem Hernandez Sola Date: Tue, 21 Apr 2026 21:45:46 +0200 Subject: [PATCH] Added new styles --- manga-translator.py | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/manga-translator.py b/manga-translator.py index 5939f6b..4b49525 100644 --- a/manga-translator.py +++ b/manga-translator.py @@ -286,6 +286,7 @@ def split_wide_ocr_items(image_bgr, filtered_ocr): return new_filtered, splits_made + def split_panel_box(image_bgr, bbox_xyxy, bubble_quads=None): x1, y1, x2, y2 = bbox_xyxy w = x2 - x1 @@ -367,20 +368,20 @@ def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thre med_h = float(np.median(hs)) if hs else 15.0 if use_aggressive_thresholds: - threshold1 = 80.0 - threshold2 = med_h * 1.2 - min_gap = 40.0 + threshold1 = 60.0 + threshold2 = med_h * 1.0 + min_gap = 20.0 else: - threshold1 = 120.0 - threshold2 = med_h * 3.0 - min_gap = 60.0 + threshold1 = 90.0 + threshold2 = med_h * 1.5 + min_gap = 25.0 if max_gap_size > threshold1 or (max_gap_size > threshold2 and max_gap_size > min_gap): split_idx = max_gap_idx left_indices = [item[0] for item in sorted_items[:split_idx]] right_indices = [item[0] for item in sorted_items[split_idx:]] - if len(left_indices) < 2 or len(right_indices) < 2: + if len(left_indices) < 1 or len(right_indices) < 1: return None return left_indices, right_indices @@ -413,8 +414,8 @@ def split_bubble_if_multiple_rows(indices, ocr, bid=None): hs = [b[3] - b[1] for b in boxes] med_h = float(np.median(hs)) if hs else 15.0 - threshold = med_h * 2.5 - min_gap = 40.0 + threshold = med_h * 1.8 + min_gap = 20.0 if max_gap_size > threshold and max_gap_size > min_gap: split_idx = max_gap_idx @@ -767,7 +768,7 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3): centers = [quad_center(r[0]) for r in ocr] hs = [max(1.0, b[3] - b[1]) for b in boxes] med_h = float(np.median(hs)) if hs else 12.0 - dist_thresh = max(20.0, med_h * 2.2) + dist_thresh = max(20.0, med_h * 1.8) adaptive_gap_y = max(gap_px, med_h * 2.5) @@ -793,7 +794,11 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3): cx2, cy2 = centers[j] is_vertically_aligned = abs(cx1 - cx2) < (med_h * 1.5) - if is_vertically_aligned and gap_y <= (med_h * 4.0): + if gap_x == 0 and gap_y <= (med_h * 3.5): + unite(i, j) + continue + + if is_vertically_aligned and gap_y <= (med_h * 3.5): unite(i, j) continue @@ -802,7 +807,7 @@ def group_tokens(ocr, image_shape, gap_px=18, bbox_padding=3): continue d = ((cx1 - cx2) ** 2 + (cy1 - cy2) ** 2) ** 0.5 - if d <= dist_thresh and abs(cy1 - cy2) <= med_h * 3.0: + if d <= dist_thresh and abs(cy1 - cy2) <= med_h * 1.5: unite(i, j) groups = {} @@ -1013,7 +1018,6 @@ def translate_manga_text( print("⚠️ No text after filtering.") return - # --- NEW: Split wide OCR items across column gaps --- filtered, splits_made = split_wide_ocr_items(image, filtered) if splits_made > 0: print(f"✂️ Split {splits_made} wide OCR lines across column gaps.") @@ -1030,7 +1034,6 @@ def translate_manga_text( box = bubble_boxes[bid] bubble_split = None - # 1. Panel border split split_result = split_panel_box(image, box, bubble_quads=bubble_quads[bid]) if split_result: box_left, box_right, split_x = split_result @@ -1054,7 +1057,6 @@ def translate_manga_text( bubble_split = (left_idxs, right_idxs) splits_performed.append(f"BOX#{bid} ({len(left_idxs)} quads | {len(right_idxs)} quads)") - # 2. Check for vertical columns (left/right split) if bubble_split is None: col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid) if col_split: @@ -1063,7 +1065,6 @@ def translate_manga_text( bubble_split = (left_idxs, right_idxs) splits_performed.append(f"BOX#{bid} (Vertical Column Split: {len(left_idxs)} | {len(right_idxs)} quads)") - # 3. Check for horizontal rows (top/bottom split) if bubble_split is None: row_split = split_bubble_if_multiple_rows(bubble_indices[bid], filtered, bid=bid) if row_split: @@ -1204,7 +1205,7 @@ def translate_manga_text( if __name__ == "__main__": translate_manga_text( - image_path="09.jpg", + image_path="004.png", source_lang="english", target_lang="ca", confidence_threshold=0.05,