diff --git a/manga-translator.py b/manga-translator.py index aba54ef..1ec7a23 100644 --- a/manga-translator.py +++ b/manga-translator.py @@ -2780,29 +2780,11 @@ def process_manga_page(image_path: str, # Bubble groups (lines as rendered in the bubble) bubble_groups = build_text_from_layout(indices, ocr) - # ── Step 12.5: Detect Background Complexity ─────────── - # We must mask out the dark text first, otherwise the text edges will trick the detector! - x1, y1, x2, y2 = adjusted_box_xyxy - roi = image_bgr[y1:y2, x1:x2] - if roi.size > 0: - gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) - # Mask out the text (assume text is dark, < 120) - _, text_mask = cv2.threshold(gray_roi, 120, 255, cv2.THRESH_BINARY_INV) - # Dilate text mask slightly to cover anti-aliasing edges - text_mask = cv2.dilate(text_mask, np.ones((3,3), np.uint8), iterations=1) - - # Get background pixels (where text_mask is 0) - bg_pixels = gray_roi[text_mask == 0] - - if len(bg_pixels) > 0: - # Check ratio of white pixels (> 220) in the background - whiteness_ratio = np.sum(bg_pixels > 220) / len(bg_pixels) - if whiteness_ratio > 0.85: - bg_type = "white" - else: - bg_type = "complex" - else: - bg_type = "complex" + # ── Step 12.5: Detect Background Complexity ─────────── + # Analyze the pixels to see if it's a plain white bubble or complex artwork + feats = contour_features_for_box(image_bgr, adjusted_box_xyxy) + if feats["whiteness_ratio"] > 0.75 and feats["edge_density"] < 0.10: + bg_type = "white" else: bg_type = "complex" @@ -2818,8 +2800,38 @@ def process_manga_page(image_path: str, print(f" ⚠️ Translation failed for BOX#{bid}: {e}") translated = corrected_text - # 👉 FIX: Use " / " instead of " || " to prevent breaking the output.txt columns - bubble_groups_str = " / ".join(bubble_groups) if bubble_groups else corrected_text + # Segment bubble_groups into || separated string for output + bubble_groups_str = " || ".join(bubble_groups) if bubble_groups else corrected_text + + # Determine OCR source label + ocr_source = "vision-base" + if correction_gain > 0.05: + ocr_source = "vision-reread" + + # Add BUBBLE / SEGMENTED flags + if bubble_groups and len(bubble_groups) > 1: + if "BUBBLE" not in flags: + flags.append("BUBBLE") + if "SEGMENTED" not in flags: + flags.append("SEGMENTED") + + results[str(bid)] = { + "order": order_idx, + "region_type": region_type, + "background_type": bg_type, # <--- NEW FLAG ADDED HERE + "confidence": round(conf, 4), + "ocr_source": ocr_source, + "raw_ocr": raw_text, + "corrected_ocr": corrected_text, + "translation_input": translation_input, + "translated": translated, + "flags": flags, + "bubble_groups": bubble_groups, + "box": xyxy_to_xywh(adjusted_box_xyxy), + "lines": bubble_groups, + } + + print(f"\n ✅ Processed {len(results)} text region(s).") # ── Step 14: Write outputs ──────────────────────────────── if output_json: @@ -2841,6 +2853,7 @@ def _write_json_output(results: Dict[str, Any], path: str) -> None: except Exception as e: print(f" ⚠️ Failed to write JSON: {e}") + def _write_txt_output(results: Dict[str, Any], path: str) -> None: sep = "─" * 120 lines = [ @@ -2849,8 +2862,7 @@ def _write_txt_output(results: Dict[str, Any], path: str) -> None: ] for bid, data in sorted(results.items(), key=lambda kv: kv[1]["order"]): - # Use " / " instead of " || " to prevent breaking the columns - bubble_groups_str = " / ".join(data.get("bubble_groups", [])) + bubble_groups_str = " || ".join(data.get("bubble_groups", [])) flags_str = ",".join(data.get("flags", [])) row = ( f"#{bid}" @@ -2873,6 +2885,7 @@ def _write_txt_output(results: Dict[str, Any], path: str) -> None: except Exception as e: print(f" ⚠️ Failed to write TXT: {e}") + # ============================================================ # DEBUG VISUALISER # ============================================================ @@ -2947,6 +2960,7 @@ def draw_debug_clusters(image_bgr: np.ndarray, return vis + # ============================================================ # CLI ENTRY POINT # ============================================================ @@ -2971,15 +2985,7 @@ def main(): base = os.path.splitext(args.image)[0] json_out = args.json or f"{base}_bubbles.json" txt_out = args.txt or f"{base}_output.txt" - - # 👉 FIX: Automatically save debug_clusters.png in the same folder as bubbles.json - if args.debug: - debug_out = args.debug - elif args.json: - out_dir = os.path.dirname(args.json) - debug_out = os.path.join(out_dir, "debug_clusters.png") - else: - debug_out = f"{base}_debug_clusters.png" + debug_out = args.debug or f"{base}_debug_clusters.png" results = process_manga_page( image_path = args.image, @@ -3033,5 +3039,6 @@ def main(): ) print(f"{'='*60}\n") + if __name__ == "__main__": main() \ No newline at end of file