Added all

2026-04-23 19:11:02 +02:00
parent 37bdc25bf6
commit fd0339d8ca
3 changed files with 382 additions and 4 deletions
--- a/batch-clean.sh
+++ b/batch-clean.sh
@@ -0,0 +1,228 @@
 #!/usr/bin/env bash
 # ============================================================
 # batch-clean.sh
 # Batch manga text removal (inpainting) using bubbles.json
 #
 # Usage:
 #   ./batch-clean.sh <folder>
 #   ./batch-clean.sh <folder> --start 3 --end 7
 #
 # Output per page lands in:
 #   <folder>/translated/<page_stem>/
 #     └── <page_stem>_cleaned.png
 # ============================================================
 set -uo pipefail
 # ─────────────────────────────────────────────────────────────
 # CONFIGURATION
 # ─────────────────────────────────────────────────────────────
 START_PAGE=1
 END_PAGE=999999
 PYTHON_BIN="python"
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 CLEANER="${SCRIPT_DIR}/clean_bubbles.py"
 # ─────────────────────────────────────────────────────────────
 # COLOURS
 # ─────────────────────────────────────────────────────────────
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 CYAN='\033[0;36m'
 BOLD='\033[1m'
 RESET='\033[0m'
 # ─────────────────────────────────────────────────────────────
 # HELPERS
 # ─────────────────────────────────────────────────────────────
 usage() {
    echo ""
    echo -e "${BOLD}Usage:${RESET}"
    echo "  $0 <folder> [options]"
    echo ""
    echo -e "${BOLD}Options:${RESET}"
    echo "  --start         First page number     (default: 1)"
    echo "  --end           Last  page number     (default: all)"
    echo "  --python        Python binary         (default: python)"
    echo "  --help,    -h   Show this help"
    echo ""
    echo -e "${BOLD}Examples:${RESET}"
    echo "  $0 pages-for-tests"
    echo "  $0 pages-for-tests --start 3 --end 7"
    echo ""
 }
 log_info()    { echo -e "${CYAN}ℹ️  $*${RESET}"; }
 log_ok()      { echo -e "${GREEN}✅  $*${RESET}"; }
 log_warn()    { echo -e "${YELLOW}⚠️  $*${RESET}"; }
 log_error()   { echo -e "${RED}❌  $*${RESET}"; }
 log_section() {
    echo -e "\n${BOLD}${CYAN}══════════════════════════════════════════${RESET}"
    echo -e "${BOLD}${CYAN}  🧹  $*${RESET}"
    echo -e "${BOLD}${CYAN}══════════════════════════════════════════${RESET}"
 }
 # ─────────────────────────────────────────────────────────────
 # ARGUMENT PARSING
 # ─────────────────────────────────────────────────────────────
 if [[ $# -eq 0 ]]; then
    log_error "No folder specified."
    usage
    exit 1
 fi
 FOLDER="$1"
 shift
 while [[ $# -gt 0 ]]; do
    case "$1" in
        --start)      START_PAGE="$2";  shift 2 ;;
        --end)        END_PAGE="$2";    shift 2 ;;
        --python)     PYTHON_BIN="$2";  shift 2 ;;
        --help|-h)    usage; exit 0 ;;
        *)
            log_error "Unknown option: $1"
            usage
            exit 1
            ;;
    esac
 done
 # ─────────────────────────────────────────────────────────────
 # VALIDATION
 # ─────────────────────────────────────────────────────────────
 if [[ ! -d "$FOLDER" ]]; then
    log_error "Folder not found: $FOLDER"
    exit 1
 fi
 if [[ ! -f "$CLEANER" ]]; then
    log_error "clean_bubbles.py not found at: $CLEANER"
    exit 1
 fi
 if ! command -v "$PYTHON_BIN" &>/dev/null; then
    log_error "Python binary not found: $PYTHON_BIN"
    log_error "Try --python python3"
    exit 1
 fi
 # ─────────────────────────────────────────────────────────────
 # DISCOVER IMAGES
 # ─────────────────────────────────────────────────────────────
 ALL_IMAGES=()
 while IFS= read -r -d '' img; do
    ALL_IMAGES+=("$img")
 done < <(
    find "$FOLDER" -maxdepth 1 -type f \
        \( -iname "*.jpg" -o -iname "*.jpeg" \
           -o -iname "*.png" -o -iname "*.webp" \) \
        -print0 | sort -z
 )
 TOTAL=${#ALL_IMAGES[@]}
 if [[ $TOTAL -eq 0 ]]; then
    log_error "No image files found in: $FOLDER"
    exit 1
 fi
 # ─────────────────────────────────────────────────────────────
 # SLICE TO REQUESTED PAGE RANGE
 # ─────────────────────────────────────────────────────────────
 PAGES=()
 for i in "${!ALL_IMAGES[@]}"; do
    PAGE_NUM=$(( i + 1 ))
    if [[ $PAGE_NUM -ge $START_PAGE && $PAGE_NUM -le $END_PAGE ]]; then
        PAGES+=("${ALL_IMAGES[$i]}")
    fi
 done
 if [[ ${#PAGES[@]} -eq 0 ]]; then
    log_error "No pages in range [${START_PAGE}, ${END_PAGE}] (total: ${TOTAL})"
    exit 1
 fi
 # ─────────────────────────────────────────────────────────────
 # SUMMARY HEADER
 # ─────────────────────────────────────────────────────────────
 log_section "BATCH MANGA CLEANER"
 log_info "📂  Folder   : $(realpath "$FOLDER")"
 log_info "📄  Pages    : ${#PAGES[@]} of ${TOTAL} total"
 log_info "🔢  Range    : ${START_PAGE} → ${END_PAGE}"
 echo ""
 # ─────────────────────────────────────────────────────────────
 # PROCESS EACH PAGE
 # ─────────────────────────────────────────────────────────────
 PASS=0
 FAIL=0
 FAIL_LIST=()
 for i in "${!PAGES[@]}"; do
    IMAGE="${PAGES[$i]}"
    PAGE_NUM=$(( START_PAGE + i ))
    STEM="$(basename "${IMAGE%.*}")"
    WORKDIR="${FOLDER}/translated/${STEM}"
    echo ""
    echo -e "${BOLD}──────────────────────────────────────────${RESET}"
    echo -e "${BOLD}  🖼️  [${PAGE_NUM}/${TOTAL}]  ${STEM}${RESET}"
    echo -e "${BOLD}──────────────────────────────────────────${RESET}"
    OUTPUT_JSON="${WORKDIR}/bubbles.json"
    OUTPUT_CLEANED="${WORKDIR}/${STEM}_cleaned.png"
    if [[ ! -f "$OUTPUT_JSON" ]]; then
        log_warn "Skipping: bubbles.json not found in ${WORKDIR}"
        FAIL=$(( FAIL + 1 ))
        FAIL_LIST+=("${STEM} (No JSON)")
        continue
    fi
    log_info "🗂️  Image : $(basename "$IMAGE")"
    log_info "🧹  Cleaning text..."
    # ── Run the cleaner ───────────────────────────────────────
    if "$PYTHON_BIN" "$CLEANER" \
            -i "$IMAGE" \
            -j "$OUTPUT_JSON" \
            -o "$OUTPUT_CLEANED"; then
        if [[ -f "$OUTPUT_CLEANED" ]]; then
            log_ok "Cleaned image saved → ${STEM}_cleaned.png"
            PASS=$(( PASS + 1 ))
        else
            log_error "Script ran but output image is missing."
            FAIL=$(( FAIL + 1 ))
            FAIL_LIST+=("${STEM} (Missing Output)")
        fi
    else
        log_error "Page ${PAGE_NUM} FAILED — check output above."
        FAIL=$(( FAIL + 1 ))
        FAIL_LIST+=("${STEM} (Script Error)")
    fi
 done
 # ─────────────────────────────────────────────────────────────
 # FINAL SUMMARY
 # ─────────────────────────────────────────────────────────────
 log_section "BATCH CLEANING COMPLETE"
 echo -e "  ✅  ${GREEN}Passed : ${PASS}${RESET}"
 echo -e "  ❌  ${RED}Failed : ${FAIL}${RESET}"
 if [[ ${#FAIL_LIST[@]} -gt 0 ]]; then
    echo ""
    log_warn "Failed pages:"
    for NAME in "${FAIL_LIST[@]}"; do
        echo -e "    ❌  ${RED}${NAME}${RESET}"
    done
 fi
 echo ""
 log_info "📦  Output folder: $(realpath "${FOLDER}/translated")"
 echo ""
 [[ $FAIL -eq 0 ]] && exit 0 || exit 1
--- a/clean_bubbles.py
+++ b/clean_bubbles.py
@@ -0,0 +1,109 @@
 import cv2
 import numpy as np
 import json
 import os
 import argparse
 def clean_text_from_box(image_bgr, box, bg_type="complex"):
    """
    Erases the text inside a specific bounding box.
    Uses a solid white fill for 'white' backgrounds, and inpainting for 'complex' ones.
    """
    x, y = int(box["x"]), int(box["y"])
    w, h = int(box["w"]), int(box["h"])
    # Image dimensions for boundary checking
    img_h, img_w = image_bgr.shape[:2]
    # Ensure coordinates don't go outside the image boundaries
    x = max(0, x)
    y = max(0, y)
    w = min(w, img_w - x)
    h = min(h, img_h - y)
    # 1. Fast Path: Plain white background
    if bg_type == "white":
        cv2.rectangle(image_bgr, (x, y), (x+w, y+h), (255, 255, 255), -1)
        return image_bgr
    # 2. Complex Path: Inpainting for screentones/artwork
    roi = image_bgr[y:y+h, x:x+w]
    if roi.size == 0:
        return image_bgr
    # Create a mask for the dark text
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    # Threshold: Pixels darker than 120 become white (the mask), others become black
    _, mask = cv2.threshold(gray, 120, 255, cv2.THRESH_BINARY_INV)
    # Dilate the mask slightly to ensure the edges of the letters are fully covered
    kernel = np.ones((3, 3), np.uint8)
    mask = cv2.dilate(mask, kernel, iterations=1)
    # Inpaint the background to erase the text
    inpainted_roi = cv2.inpaint(roi, mask, inpaintRadius=3, flags=cv2.INPAINT_TELEA)
    # Paste the cleaned region back into the main image
    image_bgr[y:y+h, x:x+w] = inpainted_roi
    return image_bgr
 def main():
    parser = argparse.ArgumentParser(description="Clean manga text using bubbles.json")
    parser.add_argument("-i", "--image", required=True, help="Path to the original manga image")
    parser.add_argument("-j", "--json", required=True, help="Path to the bubbles.json file")
    parser.add_argument("-o", "--output", help="Path to save the cleaned image (optional)")
    args = parser.parse_args()
    if not os.path.exists(args.image):
        print(f"❌ Error: Image file not found at {args.image}")
        return
    if not os.path.exists(args.json):
        print(f"❌ Error: JSON file not found at {args.json}")
        return
    # Load the image
    print(f"📂 Loading image: {args.image}")
    image = cv2.imread(args.image)
    # Load the JSON data
    print(f"📂 Loading JSON: {args.json}")
    with open(args.json, "r", encoding="utf-8") as f:
        bubbles_data = json.load(f)
    # Process each box
    print("🧹 Cleaning text from bounding boxes...")
    white_count = 0
    complex_count = 0
    for box_id, data in bubbles_data.items():
        if "box" in data:
            # Default to "complex" if the flag is missing for backward compatibility
            bg_type = data.get("background_type", "complex")
            if bg_type == "white":
                white_count += 1
            else:
                complex_count += 1
            image = clean_text_from_box(image, data["box"], bg_type)
    print(f"   ✓ Cleaned {white_count} white boxes (fast fill)")
    print(f"   ✓ Cleaned {complex_count} complex boxes (inpainting)")
    # Determine output path
    if args.output:
        out_path = args.output
    else:
        base_name, ext = os.path.splitext(args.image)
        out_path = f"{base_name}_cleaned{ext}"
    # Save the result
    cv2.imwrite(out_path, image)
    print(f"✅ Cleaned image saved successfully to: {out_path}")
 if __name__ == "__main__":
    main()
--- a/manga-translator.py
+++ b/manga-translator.py
@@ -1,11 +1,12 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-
+import cv2
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
 import textwrap
 import os
 import re
 import json
 import cv2
 import numpy as np
 import warnings
 from typing import List, Tuple, Dict, Any, Optional
@@ -519,6 +520,36 @@ def build_region_flags(raw_text, corrected_text, region_type, conf):
 # ============================================================
 # HELPERS
 # ============================================================
 def inpaint_text_only(image_bgr, box_xywh):
    """
    Erases the original text inside the bounding box using inpainting,
    leaving the background intact without drawing new text.
    """
    x, y = int(box_xywh["x"]), int(box_xywh["y"])
    w, h = int(box_xywh["w"]), int(box_xywh["h"])
    # 1. Extract the Region of Interest (ROI)
    roi = image_bgr[y:y+h, x:x+w]
    if roi.size == 0:
        return image_bgr
    # 2. Create a mask for the dark text
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    # Threshold: Pixels darker than 100 become white (the mask), others become black
    _, mask = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY_INV)
    # Dilate the mask slightly to ensure the edges of the letters are fully covered
    kernel = np.ones((3,3), np.uint8)
    mask = cv2.dilate(mask, kernel, iterations=1)
    # 3. Inpaint the background to erase the text
    inpainted_roi = cv2.inpaint(roi, mask, inpaintRadius=3, flags=cv2.INPAINT_TELEA)
    # Put the erased background back into the main image
    image_bgr[y:y+h, x:x+w] = inpainted_roi
    return image_bgr
 def split_boxes_by_horizontal_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=1.5):
    """
    Splits a single bounding box into multiple boxes if there is a large horizontal 
@@ -2749,6 +2780,14 @@ def process_manga_page(image_path: str,
        # Bubble groups (lines as rendered in the bubble)
        bubble_groups = build_text_from_layout(indices, ocr)
        # ── Step 12.5: Detect Background Complexity ───────────
        # Analyze the pixels to see if it's a plain white bubble or complex artwork
        feats = contour_features_for_box(image_bgr, adjusted_box_xyxy)
        if feats["whiteness_ratio"] > 0.75 and feats["edge_density"] < 0.10:
            bg_type = "white"
        else:
            bg_type = "complex"
        # ── Step 13: Translate ────────────────────────────────
        translated        = ""
        translation_input = corrected_text
@@ -2779,6 +2818,7 @@ def process_manga_page(image_path: str,
        results[str(bid)] = {
            "order":             order_idx,
            "region_type":       region_type,
            "background_type":   bg_type,              # <--- NEW FLAG ADDED HERE
            "confidence":        round(conf, 4),
            "ocr_source":        ocr_source,
            "raw_ocr":           raw_text,
@@ -2787,7 +2827,7 @@ def process_manga_page(image_path: str,
            "translated":        translated,
            "flags":             flags,
            "bubble_groups":     bubble_groups,
-            "box":               xyxy_to_xywh(adjusted_box_xyxy), # <--- Uses the adjusted box
+            "box":               xyxy_to_xywh(adjusted_box_xyxy),
            "lines":             bubble_groups,
        }
@@ -2801,6 +2841,7 @@ def process_manga_page(image_path: str,
        _write_txt_output(results, output_txt)
    return results
 # ============================================================
 # OUTPUT WRITERS
 # ============================================================