Added all
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -23,6 +23,7 @@ Icon
|
||||
*.jpg
|
||||
*.jpeg
|
||||
*.json
|
||||
*.webp
|
||||
|
||||
# Files that might appear in the root of a volume
|
||||
.DocumentRevisions-V100
|
||||
|
||||
@@ -302,15 +302,10 @@ def split_bubble_if_multiple_columns(indices, ocr, bid=None, use_aggressive_thre
|
||||
|
||||
|
||||
def split_bubble_if_multiple_rows(indices, ocr, bid=None):
|
||||
"""
|
||||
Splits a bubble horizontally (top/bottom) if there is a massive vertical gap
|
||||
between text lines, indicating two separate bubbles were merged.
|
||||
"""
|
||||
if len(indices) < 2:
|
||||
return None
|
||||
|
||||
boxes = [quad_bbox(ocr[i][0]) for i in indices]
|
||||
# Sort by Y-coordinate (top to bottom)
|
||||
sorted_items = sorted(zip(indices, boxes), key=lambda x: x[1][1])
|
||||
|
||||
gaps = []
|
||||
@@ -326,23 +321,19 @@ def split_bubble_if_multiple_rows(indices, ocr, bid=None):
|
||||
if not gaps:
|
||||
return None
|
||||
|
||||
# Find the largest vertical gap
|
||||
max_gap_idx, max_gap_size, _, _ = max(gaps, key=lambda x: x[1])
|
||||
|
||||
# Calculate median line height to define what a "large" gap is
|
||||
hs = [b[3] - b[1] for b in boxes]
|
||||
med_h = float(np.median(hs)) if hs else 15.0
|
||||
|
||||
# If the vertical gap is more than 2.5x the height of a text line, it's a split!
|
||||
threshold = med_h * 2.5
|
||||
min_gap = 40.0 # Absolute minimum pixel gap to prevent micro-splits
|
||||
min_gap = 40.0
|
||||
|
||||
if max_gap_size > threshold and max_gap_size > min_gap:
|
||||
split_idx = max_gap_idx
|
||||
top_indices = [item[0] for item in sorted_items[:split_idx]]
|
||||
bottom_indices = [item[0] for item in sorted_items[split_idx:]]
|
||||
|
||||
# Ensure we don't just split off a single noise character
|
||||
if len(top_indices) >= 1 and len(bottom_indices) >= 1:
|
||||
return top_indices, bottom_indices
|
||||
|
||||
@@ -354,10 +345,26 @@ def split_bubble_if_multiple_rows(indices, ocr, bid=None):
|
||||
# ============================================================
|
||||
class MacVisionDetector:
|
||||
def __init__(self, source_lang="en"):
|
||||
lang_map = {"en": "en-US", "es": "es-ES", "ca": "ca-ES", "fr": "fr-FR", "ja": "ja-JP"}
|
||||
apple_lang = lang_map.get(source_lang, "en-US")
|
||||
# 1. Normalize the input language string
|
||||
lang_key = source_lang.lower().strip()
|
||||
|
||||
# 2. Comprehensive mapping to Apple Vision BCP-47 language codes
|
||||
lang_map = {
|
||||
"en": "en-US", "english": "en-US",
|
||||
"es": "es-ES", "spanish": "es-ES",
|
||||
"ca": "ca-ES", "catalan": "ca-ES",
|
||||
"fr": "fr-FR", "french": "fr-FR",
|
||||
"ja": "ja-JP", "japanese": "ja-JP",
|
||||
"it": "it-IT", "italian": "it-IT",
|
||||
"de": "de-DE", "german": "de-DE",
|
||||
"ko": "ko-KR", "korean": "ko-KR",
|
||||
"zh": "ko-KR", "chinese": "zh-Hans" # Simplified Chinese
|
||||
}
|
||||
|
||||
# 3. Resolve the language code
|
||||
apple_lang = lang_map.get(lang_key, "en-US")
|
||||
self.langs = [apple_lang]
|
||||
print(f"⚡ Using Apple Vision OCR (Language: {self.langs})")
|
||||
print(f"⚡ Using Apple Vision OCR (Language: {self.langs[0]})")
|
||||
|
||||
def read(self, image_path_or_array):
|
||||
if isinstance(image_path_or_array, str):
|
||||
@@ -1108,8 +1115,8 @@ def translate_manga_text(
|
||||
|
||||
if __name__ == "__main__":
|
||||
translate_manga_text(
|
||||
image_path="004.png",
|
||||
source_lang="en",
|
||||
image_path="003.jpg",
|
||||
source_lang="es",
|
||||
target_lang="ca",
|
||||
confidence_threshold=0.05,
|
||||
min_text_length=1,
|
||||
@@ -1118,6 +1125,6 @@ if __name__ == "__main__":
|
||||
quality_threshold=0.62,
|
||||
export_to_file="output.txt",
|
||||
export_bubbles_to="bubbles.json",
|
||||
reading_mode="ltr",
|
||||
reading_mode="rtl", # Changed to RTL for Japanese Manga
|
||||
debug=True
|
||||
)
|
||||
)
|
||||
Reference in New Issue
Block a user