Added all

This commit is contained in:
Guillem Hernandez Sola
2026-04-23 19:11:02 +02:00
parent 37bdc25bf6
commit fd0339d8ca
3 changed files with 382 additions and 4 deletions

228
batch-clean.sh Normal file
View File

@@ -0,0 +1,228 @@
#!/usr/bin/env bash
# ============================================================
# batch-clean.sh
# Batch manga text removal (inpainting) using bubbles.json
#
# Usage:
# ./batch-clean.sh <folder>
# ./batch-clean.sh <folder> --start 3 --end 7
#
# Output per page lands in:
# <folder>/translated/<page_stem>/
# └── <page_stem>_cleaned.png
# ============================================================
set -uo pipefail
# ─────────────────────────────────────────────────────────────
# CONFIGURATION
# ─────────────────────────────────────────────────────────────
START_PAGE=1
END_PAGE=999999
PYTHON_BIN="python"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
CLEANER="${SCRIPT_DIR}/clean_bubbles.py"
# ─────────────────────────────────────────────────────────────
# COLOURS
# ─────────────────────────────────────────────────────────────
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
BOLD='\033[1m'
RESET='\033[0m'
# ─────────────────────────────────────────────────────────────
# HELPERS
# ─────────────────────────────────────────────────────────────
usage() {
echo ""
echo -e "${BOLD}Usage:${RESET}"
echo " $0 <folder> [options]"
echo ""
echo -e "${BOLD}Options:${RESET}"
echo " --start First page number (default: 1)"
echo " --end Last page number (default: all)"
echo " --python Python binary (default: python)"
echo " --help, -h Show this help"
echo ""
echo -e "${BOLD}Examples:${RESET}"
echo " $0 pages-for-tests"
echo " $0 pages-for-tests --start 3 --end 7"
echo ""
}
log_info() { echo -e "${CYAN} $*${RESET}"; }
log_ok() { echo -e "${GREEN}$*${RESET}"; }
log_warn() { echo -e "${YELLOW}⚠️ $*${RESET}"; }
log_error() { echo -e "${RED}$*${RESET}"; }
log_section() {
echo -e "\n${BOLD}${CYAN}══════════════════════════════════════════${RESET}"
echo -e "${BOLD}${CYAN} 🧹 $*${RESET}"
echo -e "${BOLD}${CYAN}══════════════════════════════════════════${RESET}"
}
# ─────────────────────────────────────────────────────────────
# ARGUMENT PARSING
# ─────────────────────────────────────────────────────────────
if [[ $# -eq 0 ]]; then
log_error "No folder specified."
usage
exit 1
fi
FOLDER="$1"
shift
while [[ $# -gt 0 ]]; do
case "$1" in
--start) START_PAGE="$2"; shift 2 ;;
--end) END_PAGE="$2"; shift 2 ;;
--python) PYTHON_BIN="$2"; shift 2 ;;
--help|-h) usage; exit 0 ;;
*)
log_error "Unknown option: $1"
usage
exit 1
;;
esac
done
# ─────────────────────────────────────────────────────────────
# VALIDATION
# ─────────────────────────────────────────────────────────────
if [[ ! -d "$FOLDER" ]]; then
log_error "Folder not found: $FOLDER"
exit 1
fi
if [[ ! -f "$CLEANER" ]]; then
log_error "clean_bubbles.py not found at: $CLEANER"
exit 1
fi
if ! command -v "$PYTHON_BIN" &>/dev/null; then
log_error "Python binary not found: $PYTHON_BIN"
log_error "Try --python python3"
exit 1
fi
# ─────────────────────────────────────────────────────────────
# DISCOVER IMAGES
# ─────────────────────────────────────────────────────────────
ALL_IMAGES=()
while IFS= read -r -d '' img; do
ALL_IMAGES+=("$img")
done < <(
find "$FOLDER" -maxdepth 1 -type f \
\( -iname "*.jpg" -o -iname "*.jpeg" \
-o -iname "*.png" -o -iname "*.webp" \) \
-print0 | sort -z
)
TOTAL=${#ALL_IMAGES[@]}
if [[ $TOTAL -eq 0 ]]; then
log_error "No image files found in: $FOLDER"
exit 1
fi
# ─────────────────────────────────────────────────────────────
# SLICE TO REQUESTED PAGE RANGE
# ─────────────────────────────────────────────────────────────
PAGES=()
for i in "${!ALL_IMAGES[@]}"; do
PAGE_NUM=$(( i + 1 ))
if [[ $PAGE_NUM -ge $START_PAGE && $PAGE_NUM -le $END_PAGE ]]; then
PAGES+=("${ALL_IMAGES[$i]}")
fi
done
if [[ ${#PAGES[@]} -eq 0 ]]; then
log_error "No pages in range [${START_PAGE}, ${END_PAGE}] (total: ${TOTAL})"
exit 1
fi
# ─────────────────────────────────────────────────────────────
# SUMMARY HEADER
# ─────────────────────────────────────────────────────────────
log_section "BATCH MANGA CLEANER"
log_info "📂 Folder : $(realpath "$FOLDER")"
log_info "📄 Pages : ${#PAGES[@]} of ${TOTAL} total"
log_info "🔢 Range : ${START_PAGE}${END_PAGE}"
echo ""
# ─────────────────────────────────────────────────────────────
# PROCESS EACH PAGE
# ─────────────────────────────────────────────────────────────
PASS=0
FAIL=0
FAIL_LIST=()
for i in "${!PAGES[@]}"; do
IMAGE="${PAGES[$i]}"
PAGE_NUM=$(( START_PAGE + i ))
STEM="$(basename "${IMAGE%.*}")"
WORKDIR="${FOLDER}/translated/${STEM}"
echo ""
echo -e "${BOLD}──────────────────────────────────────────${RESET}"
echo -e "${BOLD} 🖼️ [${PAGE_NUM}/${TOTAL}] ${STEM}${RESET}"
echo -e "${BOLD}──────────────────────────────────────────${RESET}"
OUTPUT_JSON="${WORKDIR}/bubbles.json"
OUTPUT_CLEANED="${WORKDIR}/${STEM}_cleaned.png"
if [[ ! -f "$OUTPUT_JSON" ]]; then
log_warn "Skipping: bubbles.json not found in ${WORKDIR}"
FAIL=$(( FAIL + 1 ))
FAIL_LIST+=("${STEM} (No JSON)")
continue
fi
log_info "🗂️ Image : $(basename "$IMAGE")"
log_info "🧹 Cleaning text..."
# ── Run the cleaner ───────────────────────────────────────
if "$PYTHON_BIN" "$CLEANER" \
-i "$IMAGE" \
-j "$OUTPUT_JSON" \
-o "$OUTPUT_CLEANED"; then
if [[ -f "$OUTPUT_CLEANED" ]]; then
log_ok "Cleaned image saved → ${STEM}_cleaned.png"
PASS=$(( PASS + 1 ))
else
log_error "Script ran but output image is missing."
FAIL=$(( FAIL + 1 ))
FAIL_LIST+=("${STEM} (Missing Output)")
fi
else
log_error "Page ${PAGE_NUM} FAILED — check output above."
FAIL=$(( FAIL + 1 ))
FAIL_LIST+=("${STEM} (Script Error)")
fi
done
# ─────────────────────────────────────────────────────────────
# FINAL SUMMARY
# ─────────────────────────────────────────────────────────────
log_section "BATCH CLEANING COMPLETE"
echo -e "${GREEN}Passed : ${PASS}${RESET}"
echo -e "${RED}Failed : ${FAIL}${RESET}"
if [[ ${#FAIL_LIST[@]} -gt 0 ]]; then
echo ""
log_warn "Failed pages:"
for NAME in "${FAIL_LIST[@]}"; do
echo -e "${RED}${NAME}${RESET}"
done
fi
echo ""
log_info "📦 Output folder: $(realpath "${FOLDER}/translated")"
echo ""
[[ $FAIL -eq 0 ]] && exit 0 || exit 1

109
clean_bubbles.py Normal file
View File

@@ -0,0 +1,109 @@
import cv2
import numpy as np
import json
import os
import argparse
def clean_text_from_box(image_bgr, box, bg_type="complex"):
"""
Erases the text inside a specific bounding box.
Uses a solid white fill for 'white' backgrounds, and inpainting for 'complex' ones.
"""
x, y = int(box["x"]), int(box["y"])
w, h = int(box["w"]), int(box["h"])
# Image dimensions for boundary checking
img_h, img_w = image_bgr.shape[:2]
# Ensure coordinates don't go outside the image boundaries
x = max(0, x)
y = max(0, y)
w = min(w, img_w - x)
h = min(h, img_h - y)
# 1. Fast Path: Plain white background
if bg_type == "white":
cv2.rectangle(image_bgr, (x, y), (x+w, y+h), (255, 255, 255), -1)
return image_bgr
# 2. Complex Path: Inpainting for screentones/artwork
roi = image_bgr[y:y+h, x:x+w]
if roi.size == 0:
return image_bgr
# Create a mask for the dark text
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
# Threshold: Pixels darker than 120 become white (the mask), others become black
_, mask = cv2.threshold(gray, 120, 255, cv2.THRESH_BINARY_INV)
# Dilate the mask slightly to ensure the edges of the letters are fully covered
kernel = np.ones((3, 3), np.uint8)
mask = cv2.dilate(mask, kernel, iterations=1)
# Inpaint the background to erase the text
inpainted_roi = cv2.inpaint(roi, mask, inpaintRadius=3, flags=cv2.INPAINT_TELEA)
# Paste the cleaned region back into the main image
image_bgr[y:y+h, x:x+w] = inpainted_roi
return image_bgr
def main():
parser = argparse.ArgumentParser(description="Clean manga text using bubbles.json")
parser.add_argument("-i", "--image", required=True, help="Path to the original manga image")
parser.add_argument("-j", "--json", required=True, help="Path to the bubbles.json file")
parser.add_argument("-o", "--output", help="Path to save the cleaned image (optional)")
args = parser.parse_args()
if not os.path.exists(args.image):
print(f"❌ Error: Image file not found at {args.image}")
return
if not os.path.exists(args.json):
print(f"❌ Error: JSON file not found at {args.json}")
return
# Load the image
print(f"📂 Loading image: {args.image}")
image = cv2.imread(args.image)
# Load the JSON data
print(f"📂 Loading JSON: {args.json}")
with open(args.json, "r", encoding="utf-8") as f:
bubbles_data = json.load(f)
# Process each box
print("🧹 Cleaning text from bounding boxes...")
white_count = 0
complex_count = 0
for box_id, data in bubbles_data.items():
if "box" in data:
# Default to "complex" if the flag is missing for backward compatibility
bg_type = data.get("background_type", "complex")
if bg_type == "white":
white_count += 1
else:
complex_count += 1
image = clean_text_from_box(image, data["box"], bg_type)
print(f" ✓ Cleaned {white_count} white boxes (fast fill)")
print(f" ✓ Cleaned {complex_count} complex boxes (inpainting)")
# Determine output path
if args.output:
out_path = args.output
else:
base_name, ext = os.path.splitext(args.image)
out_path = f"{base_name}_cleaned{ext}"
# Save the result
cv2.imwrite(out_path, image)
print(f"✅ Cleaned image saved successfully to: {out_path}")
if __name__ == "__main__":
main()

View File

@@ -1,11 +1,12 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import textwrap
import os import os
import re import re
import json import json
import cv2
import numpy as np
import warnings import warnings
from typing import List, Tuple, Dict, Any, Optional from typing import List, Tuple, Dict, Any, Optional
@@ -519,6 +520,36 @@ def build_region_flags(raw_text, corrected_text, region_type, conf):
# ============================================================ # ============================================================
# HELPERS # HELPERS
# ============================================================ # ============================================================
def inpaint_text_only(image_bgr, box_xywh):
"""
Erases the original text inside the bounding box using inpainting,
leaving the background intact without drawing new text.
"""
x, y = int(box_xywh["x"]), int(box_xywh["y"])
w, h = int(box_xywh["w"]), int(box_xywh["h"])
# 1. Extract the Region of Interest (ROI)
roi = image_bgr[y:y+h, x:x+w]
if roi.size == 0:
return image_bgr
# 2. Create a mask for the dark text
gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
# Threshold: Pixels darker than 100 become white (the mask), others become black
_, mask = cv2.threshold(gray, 100, 255, cv2.THRESH_BINARY_INV)
# Dilate the mask slightly to ensure the edges of the letters are fully covered
kernel = np.ones((3,3), np.uint8)
mask = cv2.dilate(mask, kernel, iterations=1)
# 3. Inpaint the background to erase the text
inpainted_roi = cv2.inpaint(roi, mask, inpaintRadius=3, flags=cv2.INPAINT_TELEA)
# Put the erased background back into the main image
image_bgr[y:y+h, x:x+w] = inpainted_roi
return image_bgr
def split_boxes_by_horizontal_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=1.5): def split_boxes_by_horizontal_gap(out_boxes, out_indices, out_quads, out_lines, ocr, gap_multiplier=1.5):
""" """
Splits a single bounding box into multiple boxes if there is a large horizontal Splits a single bounding box into multiple boxes if there is a large horizontal
@@ -2749,6 +2780,14 @@ def process_manga_page(image_path: str,
# Bubble groups (lines as rendered in the bubble) # Bubble groups (lines as rendered in the bubble)
bubble_groups = build_text_from_layout(indices, ocr) bubble_groups = build_text_from_layout(indices, ocr)
# ── Step 12.5: Detect Background Complexity ───────────
# Analyze the pixels to see if it's a plain white bubble or complex artwork
feats = contour_features_for_box(image_bgr, adjusted_box_xyxy)
if feats["whiteness_ratio"] > 0.75 and feats["edge_density"] < 0.10:
bg_type = "white"
else:
bg_type = "complex"
# ── Step 13: Translate ──────────────────────────────── # ── Step 13: Translate ────────────────────────────────
translated = "" translated = ""
translation_input = corrected_text translation_input = corrected_text
@@ -2779,6 +2818,7 @@ def process_manga_page(image_path: str,
results[str(bid)] = { results[str(bid)] = {
"order": order_idx, "order": order_idx,
"region_type": region_type, "region_type": region_type,
"background_type": bg_type, # <--- NEW FLAG ADDED HERE
"confidence": round(conf, 4), "confidence": round(conf, 4),
"ocr_source": ocr_source, "ocr_source": ocr_source,
"raw_ocr": raw_text, "raw_ocr": raw_text,
@@ -2787,7 +2827,7 @@ def process_manga_page(image_path: str,
"translated": translated, "translated": translated,
"flags": flags, "flags": flags,
"bubble_groups": bubble_groups, "bubble_groups": bubble_groups,
"box": xyxy_to_xywh(adjusted_box_xyxy), # <--- Uses the adjusted box "box": xyxy_to_xywh(adjusted_box_xyxy),
"lines": bubble_groups, "lines": bubble_groups,
} }
@@ -2801,6 +2841,7 @@ def process_manga_page(image_path: str,
_write_txt_output(results, output_txt) _write_txt_output(results, output_txt)
return results return results
# ============================================================ # ============================================================
# OUTPUT WRITERS # OUTPUT WRITERS
# ============================================================ # ============================================================