Added some stuff

2026-04-23 18:34:13 +02:00
parent 2f61814971
commit 37bdc25bf6
1 changed files with 192 additions and 0 deletions
--- a/manga-translator.py
+++ b/manga-translator.py
@@ -519,6 +519,186 @@ def build_region_flags(raw_text, corrected_text, region_type, conf):
 # ============================================================
 # HELPERS
 # ============================================================
+def split_boxes_by_horizontal_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=1.5):
+    """
+    Splits a single bounding box into multiple boxes if there is a large horizontal 
+    gap between columns of text (e.g., side-by-side speech bubbles).
+    """
+    new_boxes = {}
+    new_indices = {}
+    new_quads = {}
+    new_lines = {}
+    
+    next_bid = max(out_boxes.keys()) + 1 if out_boxes else 1
+
+    for bid, box in out_boxes.items():
+        indices = out_indices[bid]
+        lines = out_lines[bid]
+        
+        if len(indices) < 2:
+            # Nothing to split
+            new_boxes[bid] = box
+            new_indices[bid] = indices
+            new_quads[bid] = out_quads[bid]
+            new_lines[bid] = lines
+            continue
+            
+        # Extract bounding coordinates for each text line
+        line_data = []
+        for idx, text in zip(indices, lines):
+            quad = ocr[idx][0]
+            xs = [p[0] for p in quad]
+            ys = [p[1] for p in quad]
+            line_data.append({
+                "idx": idx, "text": text, "quad": quad,
+                "min_x": min(xs), "max_x": max(xs),
+                "min_y": min(ys), "max_y": max(ys)
+            })
+            
+        # Calculate average line height to use as a dynamic gap threshold
+        avg_height = sum(ld["max_y"] - ld["min_y"] for ld in line_data) / len(line_data)
+        gap_threshold = avg_height * gap_multiplier
+        
+        # Sort lines horizontally (left to right)
+        line_data.sort(key=lambda x: x["min_x"])
+        
+        columns = []
+        current_column = [line_data[0]]
+        current_max_x = line_data[0]["max_x"]
+        
+        # Group lines into columns based on horizontal proximity
+        for i in range(1, len(line_data)):
+            curr_line = line_data[i]
+            horizontal_gap = curr_line["min_x"] - current_max_x
+            
+            if horizontal_gap > gap_threshold:
+                # Gap is too large! Split into a new column here.
+                columns.append(current_column)
+                current_column = [curr_line]
+                current_max_x = curr_line["max_x"]
+            else:
+                # Belongs to the same column
+                current_column.append(curr_line)
+                current_max_x = max(current_max_x, curr_line["max_x"])
+                
+        columns.append(current_column)
+        
+        # Reassign to new boxes
+        if len(columns) == 1:
+            # No split happened
+            new_boxes[bid] = box
+            new_indices[bid] = indices
+            new_quads[bid] = out_quads[bid]
+            new_lines[bid] = lines
+        else:
+            # Box was split horizontally! Create new entries
+            for col in columns:
+                # Re-sort the lines in this new box vertically (top to bottom)
+                col.sort(key=lambda x: x["min_y"])
+                
+                c_indices = [ld["idx"] for ld in col]
+                c_lines = [ld["text"] for ld in col]
+                c_quads = [ld["quad"] for ld in col]
+                
+                # Calculate new bounding box for this specific column
+                all_xs = [p[0] for q in c_quads for p in q]
+                all_ys = [p[1] for q in c_quads for p in q]
+                c_box = (min(all_xs), min(all_ys), max(all_xs), max(all_ys))
+                
+                new_boxes[next_bid] = c_box
+                new_indices[next_bid] = c_indices
+                new_quads[next_bid] = c_quads
+                new_lines[next_bid] = c_lines
+                next_bid += 1
+
+    return new_lines, new_boxes, new_quads, new_indices
+
+def split_boxes_by_vertical_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=2.5):
+    """
+    Splits a single bounding box into multiple boxes if there is a large vertical 
+    gap between the text lines.
+    """
+    new_boxes = {}
+    new_indices = {}
+    new_quads = {}
+    new_lines = {}
+    
+    next_bid = max(out_boxes.keys()) + 1 if out_boxes else 1
+
+    for bid, box in out_boxes.items():
+        indices = out_indices[bid]
+        lines = out_lines[bid]
+        
+        if len(indices) < 2:
+            # Nothing to split
+            new_boxes[bid] = box
+            new_indices[bid] = indices
+            new_quads[bid] = out_quads[bid]
+            new_lines[bid] = lines
+            continue
+            
+        # Get the bounding boxes for each individual text line (quad)
+        line_data = []
+        for idx, text in zip(indices, lines):
+            quad = ocr[idx][0]
+            # quad is [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
+            ys = [p[1] for p in quad]
+            min_y, max_y = min(ys), max(ys)
+            line_data.append({"idx": idx, "text": text, "min_y": min_y, "max_y": max_y, "quad": quad})
+            
+        # Sort lines vertically
+        line_data.sort(key=lambda x: x["min_y"])
+        
+        # Calculate average line height in this box
+        avg_height = sum(ld["max_y"] - ld["min_y"] for ld in line_data) / len(line_data)
+        gap_threshold = avg_height * gap_multiplier
+        
+        # Find split points
+        clusters = []
+        current_cluster = [line_data[0]]
+        
+        for i in range(1, len(line_data)):
+            prev_line = current_cluster[-1]
+            curr_line = line_data[i]
+            
+            vertical_gap = curr_line["min_y"] - prev_line["max_y"]
+            
+            if vertical_gap > gap_threshold:
+                # Gap is too large! Split here.
+                clusters.append(current_cluster)
+                current_cluster = [curr_line]
+            else:
+                current_cluster.append(curr_line)
+                
+        clusters.append(current_cluster)
+        
+        # Reassign to new boxes
+        if len(clusters) == 1:
+            # No split happened
+            new_boxes[bid] = box
+            new_indices[bid] = indices
+            new_quads[bid] = out_quads[bid]
+            new_lines[bid] = lines
+        else:
+            # Box was split! Create new entries
+            for cluster in clusters:
+                c_indices = [ld["idx"] for ld in cluster]
+                c_lines = [ld["text"] for ld in cluster]
+                c_quads = [ld["quad"] for ld in cluster]
+                
+                # Calculate new bounding box for this cluster
+                all_xs = [p[0] for q in c_quads for p in q]
+                all_ys = [p[1] for q in c_quads for p in q]
+                c_box = (min(all_xs), min(all_ys), max(all_xs), max(all_ys))
+                
+                new_boxes[next_bid] = c_box
+                new_indices[next_bid] = c_indices
+                new_quads[next_bid] = c_quads
+                new_lines[next_bid] = c_lines
+                next_bid += 1
+
+    return new_lines, new_boxes, new_quads, new_indices
+
 def normalize_text(text: str) -> str:
    t = (text or "").strip().upper()
    t = t.replace("\u201c", "\"").replace("\u201d", "\"")
@@ -2506,6 +2686,18 @@ def process_manga_page(image_path: str,

    print(f"   Boxes after dedup: {len(out_boxes)}")

+    # ── Step 9.5: Split boxes with large vertical gaps ────────
+    out_lines, out_boxes, out_quads, out_indices = \
+        split_boxes_by_vertical_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=2.5)
+
+    print(f"   Boxes after vertical gap split: {len(out_boxes)}")
+
+    # ── Step 9.6: Split boxes with large horizontal gaps ──────
+    out_lines, out_boxes, out_quads, out_indices = \
+        split_boxes_by_horizontal_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=1.5)
+
+    print(f"   Boxes after horizontal gap split: {len(out_boxes)}")
+    
    # ── Step 10: Enforce max box size ─────────────────────────
    out_lines, out_boxes, out_quads, out_indices = \
        enforce_max_box_size(out_boxes, out_indices, out_quads, out_lines,