diff --git a/.gitignore b/.gitignore index de18566..646c941 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,7 @@ Icon *.jpg *.jpeg *.json +*.webp # Files that might appear in the root of a volume .DocumentRevisions-V100 diff --git a/manga-translator.py b/manga-translator.py index 66cb56c..7916853 100644 --- a/manga-translator.py +++ b/manga-translator.py @@ -302,15 +302,10 @@ def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thre def split_bubble_if_multiple_rows(indices, ocr, bid=None): - """ - Splits a bubble horizontally (top/bottom) if there is a massive vertical gap - between text lines, indicating two separate bubbles were merged. - """ if len(indices) < 2: return None boxes = [quad_bbox(ocr[i][0]) for i in indices] - # Sort by Y-coordinate (top to bottom) sorted_items = sorted(zip(indices, boxes), key=lambda x: x[1][1]) gaps = [] @@ -326,23 +321,19 @@ def split_bubble_if_multiple_rows(indices, ocr, bid=None): if not gaps: return None - # Find the largest vertical gap max_gap_idx, max_gap_size, _, _ = max(gaps, key=lambda x: x[1]) - # Calculate median line height to define what a "large" gap is hs = [b[3] - b[1] for b in boxes] med_h = float(np.median(hs)) if hs else 15.0 - # If the vertical gap is more than 2.5x the height of a text line, it's a split! threshold = med_h * 2.5 - min_gap = 40.0 # Absolute minimum pixel gap to prevent micro-splits + min_gap = 40.0 if max_gap_size > threshold and max_gap_size > min_gap: split_idx = max_gap_idx top_indices = [item[0] for item in sorted_items[:split_idx]] bottom_indices = [item[0] for item in sorted_items[split_idx:]] - # Ensure we don't just split off a single noise character if len(top_indices) >= 1 and len(bottom_indices) >= 1: return top_indices, bottom_indices @@ -354,10 +345,26 @@ def split_bubble_if_multiple_rows(indices, ocr, bid=None): # ============================================================ class MacVisionDetector: def __init__(self, source_lang="en"): - lang_map = {"en": "en-US", "es": "es-ES", "ca": "ca-ES", "fr": "fr-FR", "ja": "ja-JP"} - apple_lang = lang_map.get(source_lang, "en-US") + # 1. Normalize the input language string + lang_key = source_lang.lower().strip() + + # 2. Comprehensive mapping to Apple Vision BCP-47 language codes + lang_map = { + "en": "en-US", "english": "en-US", + "es": "es-ES", "spanish": "es-ES", + "ca": "ca-ES", "catalan": "ca-ES", + "fr": "fr-FR", "french": "fr-FR", + "ja": "ja-JP", "japanese": "ja-JP", + "it": "it-IT", "italian": "it-IT", + "de": "de-DE", "german": "de-DE", + "ko": "ko-KR", "korean": "ko-KR", + "zh": "ko-KR", "chinese": "zh-Hans" # Simplified Chinese + } + + # 3. Resolve the language code + apple_lang = lang_map.get(lang_key, "en-US") self.langs = [apple_lang] - print(f"⚡ Using Apple Vision OCR (Language: {self.langs})") + print(f"⚡ Using Apple Vision OCR (Language: {self.langs[0]})") def read(self, image_path_or_array): if isinstance(image_path_or_array, str): @@ -1108,8 +1115,8 @@ def translate_manga_text( if __name__ == "__main__": translate_manga_text( - image_path="004.png", - source_lang="en", + image_path="003.jpg", + source_lang="es", target_lang="ca", confidence_threshold=0.05, min_text_length=1, @@ -1118,6 +1125,6 @@ if __name__ == "__main__": quality_threshold=0.62, export_to_file="output.txt", export_bubbles_to="bubbles.json", - reading_mode="ltr", + reading_mode="rtl", # Changed to RTL for Japanese Manga debug=True - ) + ) \ No newline at end of file