Added all

This commit is contained in:
Guillem Hernandez Sola
2026-04-21 21:03:35 +02:00
parent ba5f001e75
commit f56ee49abf
2 changed files with 25 additions and 17 deletions

1
.gitignore vendored
View File

@@ -23,6 +23,7 @@ Icon
*.jpg
*.jpeg
*.json
*.webp
# Files that might appear in the root of a volume
.DocumentRevisions-V100

View File

@@ -302,15 +302,10 @@ def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thre
def split_bubble_if_multiple_rows(indices, ocr, bid=None):
"""
Splits a bubble horizontally (top/bottom) if there is a massive vertical gap
between text lines, indicating two separate bubbles were merged.
"""
if len(indices) < 2:
return None
boxes = [quad_bbox(ocr[i][0]) for i in indices]
# Sort by Y-coordinate (top to bottom)
sorted_items = sorted(zip(indices, boxes), key=lambda x: x[1][1])
gaps = []
@@ -326,23 +321,19 @@ def split_bubble_if_multiple_rows(indices, ocr, bid=None):
if not gaps:
return None
# Find the largest vertical gap
max_gap_idx, max_gap_size, _, _ = max(gaps, key=lambda x: x[1])
# Calculate median line height to define what a "large" gap is
hs = [b[3] - b[1] for b in boxes]
med_h = float(np.median(hs)) if hs else 15.0
# If the vertical gap is more than 2.5x the height of a text line, it's a split!
threshold = med_h * 2.5
min_gap = 40.0 # Absolute minimum pixel gap to prevent micro-splits
min_gap = 40.0
if max_gap_size > threshold and max_gap_size > min_gap:
split_idx = max_gap_idx
top_indices = [item[0] for item in sorted_items[:split_idx]]
bottom_indices = [item[0] for item in sorted_items[split_idx:]]
# Ensure we don't just split off a single noise character
if len(top_indices) >= 1 and len(bottom_indices) >= 1:
return top_indices, bottom_indices
@@ -354,10 +345,26 @@ def split_bubble_if_multiple_rows(indices, ocr, bid=None):
# ============================================================
class MacVisionDetector:
def __init__(self, source_lang="en"):
lang_map = {"en": "en-US", "es": "es-ES", "ca": "ca-ES", "fr": "fr-FR", "ja": "ja-JP"}
apple_lang = lang_map.get(source_lang, "en-US")
# 1. Normalize the input language string
lang_key = source_lang.lower().strip()
# 2. Comprehensive mapping to Apple Vision BCP-47 language codes
lang_map = {
"en": "en-US", "english": "en-US",
"es": "es-ES", "spanish": "es-ES",
"ca": "ca-ES", "catalan": "ca-ES",
"fr": "fr-FR", "french": "fr-FR",
"ja": "ja-JP", "japanese": "ja-JP",
"it": "it-IT", "italian": "it-IT",
"de": "de-DE", "german": "de-DE",
"ko": "ko-KR", "korean": "ko-KR",
"zh": "ko-KR", "chinese": "zh-Hans" # Simplified Chinese
}
# 3. Resolve the language code
apple_lang = lang_map.get(lang_key, "en-US")
self.langs = [apple_lang]
print(f"⚡ Using Apple Vision OCR (Language: {self.langs})")
print(f"⚡ Using Apple Vision OCR (Language: {self.langs[0]})")
def read(self, image_path_or_array):
if isinstance(image_path_or_array, str):
@@ -1108,8 +1115,8 @@ def translate_manga_text(
if __name__ == "__main__":
translate_manga_text(
image_path="004.png",
source_lang="en",
image_path="003.jpg",
source_lang="es",
target_lang="ca",
confidence_threshold=0.05,
min_text_length=1,
@@ -1118,6 +1125,6 @@ if __name__ == "__main__":
quality_threshold=0.62,
export_to_file="output.txt",
export_bubbles_to="bubbles.json",
reading_mode="ltr",
reading_mode="rtl", # Changed to RTL for Japanese Manga
debug=True
)