From a5c81f4ff070f849c92b945d8e1112c0d2300540 Mon Sep 17 00:00:00 2001
From: Guillem Hernandez Sola <guillem.hernandez.sola@gmail.com>
Date: Tue, 21 Apr 2026 21:27:22 +0200
Subject: [PATCH] Added new styles

---
 manga-translator.py | 113 +++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 101 insertions(+), 12 deletions(-)

diff --git a/manga-translator.py b/manga-translator.py
index 7916853..5939f6b 100644
--- a/manga-translator.py
+++ b/manga-translator.py
@@ -104,11 +104,9 @@ def looks_like_box_tag(t: str) -> bool:
 def is_noise_text(text: str) -> bool:
     t = (text or "").strip()
 
-    # ALLOW pure punctuation clusters like "...", "!!", "?!"
     if re.fullmatch(r"[\?\!\.]+", t):
         return False
 
-    # ALLOW single alphabetical characters (crucial for vertical text)
     if len(t) == 1 and t.isalpha():
         return False
 
@@ -117,7 +115,6 @@ def is_noise_text(text: str) -> bool:
     if looks_like_box_tag(t):
         return True
     
-    # Relaxed the length check to allow 1-2 letter words and punctuation
     if len(t) <= 2 and not re.search(r"[A-Z0-9\?\!\.]", t) and not t.isalpha():
         return True
 
@@ -199,12 +196,102 @@ def ocr_candidate_score(text: str) -> float:
 # ============================================================
 # SPLITTERS
 # ============================================================
+def split_wide_ocr_items(image_bgr, filtered_ocr):
+    """
+    Detects if Apple Vision incorrectly merged two columns into a single wide line.
+    It measures the width of the white gaps and only splits if the gap is 
+    significantly wider than a normal space between words.
+    """
+    new_filtered = []
+    splits_made = 0
+    
+    for item in filtered_ocr:
+        quad, text, conf = item
+        x1, y1, x2, y2 = quad_bbox(quad)
+        w = x2 - x1
+        h = max(1, y2 - y1)
+        
+        # Check if it's abnormally wide
+        if w > h * 2.5 and len(text) > 5 and ' ' in text:
+            pad = 2
+            roi_y1 = max(0, y1 - pad)
+            roi_y2 = min(image_bgr.shape[0], y2 + pad)
+            roi_x1 = max(0, x1)
+            roi_x2 = min(image_bgr.shape[1], x2)
+            
+            roi = image_bgr[roi_y1:roi_y2, roi_x1:roi_x2]
+            if roi.size > 0:
+                gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
+                _, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
+                proj = np.sum(thresh, axis=0)
+                
+                start_x = int(w * 0.20)
+                end_x = int(w * 0.80)
+                
+                if start_x < end_x:
+                    # Calculate expected character width
+                    char_w = w / max(1, len(text))
+                    # A real column gap should be at least 2.5 chars wide or 75% of line height
+                    min_gap_width = max(int(char_w * 2.5), int(h * 0.75))
+                    
+                    gap_threshold = h * 255 * 0.15
+                    gap_mask = proj < gap_threshold
+                    
+                    # Find the widest continuous gap
+                    best_gap_start = -1
+                    best_gap_len = 0
+                    current_gap_start = -1
+                    current_gap_len = 0
+                    
+                    for x_rel in range(start_x, end_x):
+                        if gap_mask[x_rel]:
+                            if current_gap_len == 0:
+                                current_gap_start = x_rel
+                            current_gap_len += 1
+                        else:
+                            if current_gap_len > best_gap_len:
+                                best_gap_len = current_gap_len
+                                best_gap_start = current_gap_start
+                            current_gap_len = 0
+                            
+                    if current_gap_len > best_gap_len:
+                        best_gap_len = current_gap_len
+                        best_gap_start = current_gap_start
+                        
+                    # ONLY split if the gap is wide enough to be a gutter between bubbles
+                    if best_gap_len >= min_gap_width:
+                        split_x = roi_x1 + best_gap_start + (best_gap_len // 2)
+                        
+                        split_idx = int((split_x - x1) / char_w)
+                        
+                        spaces = [i for i, c in enumerate(text) if c == ' ']
+                        if spaces:
+                            best_space = min(spaces, key=lambda i: abs(i - split_idx))
+                            if abs(best_space - split_idx) < len(text) * 0.35:
+                                split_idx = best_space
+                                    
+                        text_left = text[:split_idx].strip()
+                        text_right = text[split_idx:].strip()
+                        
+                        if text_left and text_right:
+                            quad_left = [[x1, y1], [split_x, y1], [split_x, y2], [x1, y2]]
+                            quad_right = [[split_x, y1], [x2, y1], [x2, y2], [split_x, y2]]
+                            new_filtered.append((quad_left, text_left, conf))
+                            new_filtered.append((quad_right, text_right, conf))
+                            splits_made += 1
+                            continue
+                            
+        # If no split was made, keep the original item
+        new_filtered.append(item)
+        
+    return new_filtered, splits_made
+
 def split_panel_box(image_bgr, bbox_xyxy, bubble_quads=None):
     x1, y1, x2, y2 = bbox_xyxy
     w = x2 - x1
     h = y2 - y1
     
-    if bubble_quads is not None and len(bubble_quads) < 10:
+    if bubble_quads is not None and len(bubble_quads) < 4:
         return None
     
     if w < 50 or h < 50:
@@ -345,10 +432,8 @@ def split_bubble_if_multiple_rows(indices, ocr, bid=None):
 # ============================================================
 class MacVisionDetector:
     def __init__(self, source_lang="en"):
-        # 1. Normalize the input language string
         lang_key = source_lang.lower().strip()
         
-        # 2. Comprehensive mapping to Apple Vision BCP-47 language codes
         lang_map = {
             "en": "en-US", "english": "en-US",
             "es": "es-ES", "spanish": "es-ES",
@@ -358,10 +443,9 @@ class MacVisionDetector:
             "it": "it-IT", "italian": "it-IT",
             "de": "de-DE", "german": "de-DE",
             "ko": "ko-KR", "korean": "ko-KR",
-            "zh": "ko-KR", "chinese": "zh-Hans" # Simplified Chinese
+            "zh": "zh-Hans", "chinese": "zh-Hans"
         }
         
-        # 3. Resolve the language code
         apple_lang = lang_map.get(lang_key, "en-US")
         self.langs = [apple_lang]
         print(f"⚡ Using Apple Vision OCR (Language: {self.langs[0]})")
@@ -929,6 +1013,11 @@ def translate_manga_text(
         print("⚠️ No text after filtering.")
         return
 
+    # --- NEW: Split wide OCR items across column gaps ---
+    filtered, splits_made = split_wide_ocr_items(image, filtered)
+    if splits_made > 0:
+        print(f"✂️  Split {splits_made} wide OCR lines across column gaps.")
+
     bubbles, bubble_boxes, bubble_quads, bubble_indices = group_tokens(
         filtered, image.shape, gap_px=resolved_gap, bbox_padding=3
     )
@@ -957,7 +1046,7 @@ def translate_manga_text(
             if left_idxs and right_idxs:
                 bubble_split = (left_idxs, right_idxs)
                 splits_performed.append(f"BOX#{bid} (panel border at x={split_x})")
-            elif len(bubble_quads[bid]) >= 10:
+            elif len(bubble_quads[bid]) >= 4:
                 col_split = split_bubble_if_multiple_columns(bubble_indices[bid], filtered, bid=bid, use_aggressive_thresholds=True)
                 if col_split:
                     left_idxs, right_idxs = col_split
@@ -1115,8 +1204,8 @@ def translate_manga_text(
 
 if __name__ == "__main__":
     translate_manga_text(
-        image_path="003.jpg",
-        source_lang="es",
+        image_path="09.jpg",
+        source_lang="english", 
         target_lang="ca",
         confidence_threshold=0.05,
         min_text_length=1,
@@ -1125,6 +1214,6 @@ if __name__ == "__main__":
         quality_threshold=0.62,
         export_to_file="output.txt",
         export_bubbles_to="bubbles.json",
-        reading_mode="rtl", # Changed to RTL for Japanese Manga
+        reading_mode="rtl", 
         debug=True
     )
\ No newline at end of file