From a5c81f4ff070f849c92b945d8e1112c0d2300540 Mon Sep 17 00:00:00 2001 From: Guillem Hernandez Sola Date: Tue, 21 Apr 2026 21:27:22 +0200 Subject: [PATCH] Added new styles --- manga-translator.py | 113 +++++++++++++++++++++++++++++++++++++++----- 1 file changed, 101 insertions(+), 12 deletions(-) diff --git a/manga-translator.py b/manga-translator.py index 7916853..5939f6b 100644 --- a/manga-translator.py +++ b/manga-translator.py @@ -104,11 +104,9 @@ def looks_like_box_tag(t: str) -> bool: def is_noise_text(text: str) -> bool: t = (text or "").strip() - # ALLOW pure punctuation clusters like "...", "!!", "?!" if re.fullmatch(r"[\?\!\.]+", t): return False - # ALLOW single alphabetical characters (crucial for vertical text) if len(t) == 1 and t.isalpha(): return False @@ -117,7 +115,6 @@ def is_noise_text(text: str) -> bool: if looks_like_box_tag(t): return True - # Relaxed the length check to allow 1-2 letter words and punctuation if len(t) <= 2 and not re.search(r"[A-Z0-9\?\!\.]", t) and not t.isalpha(): return True @@ -199,12 +196,102 @@ def ocr_candidate_score(text: str) -> float: # ============================================================ # SPLITTERS # ============================================================ +def split_wide_ocr_items(image_bgr, filtered_ocr): + """ + Detects if Apple Vision incorrectly merged two columns into a single wide line. + It measures the width of the white gaps and only splits if the gap is + significantly wider than a normal space between words. + """ + new_filtered = [] + splits_made = 0 + + for item in filtered_ocr: + quad, text, conf = item + x1, y1, x2, y2 = quad_bbox(quad) + w = x2 - x1 + h = max(1, y2 - y1) + + # Check if it's abnormally wide + if w > h * 2.5 and len(text) > 5 and ' ' in text: + pad = 2 + roi_y1 = max(0, y1 - pad) + roi_y2 = min(image_bgr.shape[0], y2 + pad) + roi_x1 = max(0, x1) + roi_x2 = min(image_bgr.shape[1], x2) + + roi = image_bgr[roi_y1:roi_y2, roi_x1:roi_x2] + if roi.size > 0: + gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) + _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU) + proj = np.sum(thresh, axis=0) + + start_x = int(w * 0.20) + end_x = int(w * 0.80) + + if start_x < end_x: + # Calculate expected character width + char_w = w / max(1, len(text)) + # A real column gap should be at least 2.5 chars wide or 75% of line height + min_gap_width = max(int(char_w * 2.5), int(h * 0.75)) + + gap_threshold = h * 255 * 0.15 + gap_mask = proj < gap_threshold + + # Find the widest continuous gap + best_gap_start = -1 + best_gap_len = 0 + current_gap_start = -1 + current_gap_len = 0 + + for x_rel in range(start_x, end_x): + if gap_mask[x_rel]: + if current_gap_len == 0: + current_gap_start = x_rel + current_gap_len += 1 + else: + if current_gap_len > best_gap_len: + best_gap_len = current_gap_len + best_gap_start = current_gap_start + current_gap_len = 0 + + if current_gap_len > best_gap_len: + best_gap_len = current_gap_len + best_gap_start = current_gap_start + + # ONLY split if the gap is wide enough to be a gutter between bubbles + if best_gap_len >= min_gap_width: + split_x = roi_x1 + best_gap_start + (best_gap_len // 2) + + split_idx = int((split_x - x1) / char_w) + + spaces = [i for i, c in enumerate(text) if c == ' '] + if spaces: + best_space = min(spaces, key=lambda i: abs(i - split_idx)) + if abs(best_space - split_idx) < len(text) * 0.35: + split_idx = best_space + + text_left = text[:split_idx].strip() + text_right = text[split_idx:].strip() + + if text_left and text_right: + quad_left = [[x1, y1], [split_x, y1], [split_x, y2], [x1, y2]] + quad_right = [[split_x, y1], [x2, y1], [x2, y2], [split_x, y2]] + new_filtered.append((quad_left, text_left, conf)) + new_filtered.append((quad_right, text_right, conf)) + splits_made += 1 + continue + + # If no split was made, keep the original item + new_filtered.append(item) + + return new_filtered, splits_made + def split_panel_box(image_bgr, bbox_xyxy, bubble_quads=None): x1, y1, x2, y2 = bbox_xyxy w = x2 - x1 h = y2 - y1 - if bubble_quads is not None and len(bubble_quads) < 10: + if bubble_quads is not None and len(bubble_quads) < 4: return None if w < 50 or h < 50: @@ -345,10 +432,8 @@ def split_bubble_if_multiple_rows(indices, ocr, bid=None): # ============================================================ class MacVisionDetector: def __init__(self, source_lang="en"): - # 1. Normalize the input language string lang_key = source_lang.lower().strip() - # 2. Comprehensive mapping to Apple Vision BCP-47 language codes lang_map = { "en": "en-US", "english": "en-US", "es": "es-ES", "spanish": "es-ES", @@ -358,10 +443,9 @@ class MacVisionDetector: "it": "it-IT", "italian": "it-IT", "de": "de-DE", "german": "de-DE", "ko": "ko-KR", "korean": "ko-KR", - "zh": "ko-KR", "chinese": "zh-Hans" # Simplified Chinese + "zh": "zh-Hans", "chinese": "zh-Hans" } - # 3. Resolve the language code apple_lang = lang_map.get(lang_key, "en-US") self.langs = [apple_lang] print(f"⚡ Using Apple Vision OCR (Language: {self.langs[0]})") @@ -929,6 +1013,11 @@ def translate_manga_text( print("⚠️ No text after filtering.") return + # --- NEW: Split wide OCR items across column gaps --- + filtered, splits_made = split_wide_ocr_items(image, filtered) + if splits_made > 0: + print(f"✂️ Split {splits_made} wide OCR lines across column gaps.") + bubbles, bubble_boxes, bubble_quads, bubble_indices = group_tokens( filtered, image.shape, gap_px=resolved_gap, bbox_padding=3 ) @@ -957,7 +1046,7 @@ def translate_manga_text( if left_idxs and right_idxs: bubble_split = (left_idxs, right_idxs) splits_performed.append(f"BOX#{bid} (panel border at x={split_x})") - elif len(bubble_quads[bid]) >= 10: + elif len(bubble_quads[bid]) >= 4: col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid, use_aggressive_thresholds=True) if col_split: left_idxs, right_idxs = col_split @@ -1115,8 +1204,8 @@ def translate_manga_text( if __name__ == "__main__": translate_manga_text( - image_path="003.jpg", - source_lang="es", + image_path="09.jpg", + source_lang="english", target_lang="ca", confidence_threshold=0.05, min_text_length=1, @@ -1125,6 +1214,6 @@ if __name__ == "__main__": quality_threshold=0.62, export_to_file="output.txt", export_bubbles_to="bubbles.json", - reading_mode="rtl", # Changed to RTL for Japanese Manga + reading_mode="rtl", debug=True ) \ No newline at end of file