Added all

2026-04-23 19:28:39 +02:00
parent fd0339d8ca
commit 832d699917
3 changed files with 312 additions and 137 deletions
--- a/batch-clean.sh
+++ b/batch-clean.sh
--- a/batch-renderer.sh
+++ b/batch-renderer.sh
@@ -0,0 +1,240 @@
+#!/usr/bin/env bash
+# ============================================================
+# batch-renderer.sh
+# Batch manga text rendering using cleaned images and output.txt
+#
+# Usage:
+#   ./batch-renderer.sh <folder>
+#   ./batch-renderer.sh <folder> --start 3 --end 7
+#
+# Output per page lands in:
+#   <folder>/translated/<page_stem>/
+#     └── <page_stem>_translated.png
+# ============================================================
+
+set -uo pipefail
+
+# ─────────────────────────────────────────────────────────────
+# CONFIGURATION
+# ─────────────────────────────────────────────────────────────
+START_PAGE=1
+END_PAGE=999999
+PYTHON_BIN="python"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+RENDERER="${SCRIPT_DIR}/manga-renderer.py"
+
+# ─────────────────────────────────────────────────────────────
+# COLOURS
+# ─────────────────────────────────────────────────────────────
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+CYAN='\033[0;36m'
+BOLD='\033[1m'
+RESET='\033[0m'
+
+# ─────────────────────────────────────────────────────────────
+# HELPERS
+# ─────────────────────────────────────────────────────────────
+usage() {
+    echo ""
+    echo -e "${BOLD}Usage:${RESET}"
+    echo "  $0 <folder> [options]"
+    echo ""
+    echo -e "${BOLD}Options:${RESET}"
+    echo "  --start         First page number     (default: 1)"
+    echo "  --end           Last  page number     (default: all)"
+    echo "  --python        Python binary         (default: python)"
+    echo "  --help,    -h   Show this help"
+    echo ""
+    echo -e "${BOLD}Examples:${RESET}"
+    echo "  $0 pages-for-tests"
+    echo "  $0 pages-for-tests --start 3 --end 7"
+    echo ""
+}
+
+log_info()    { echo -e "${CYAN}ℹ️  $*${RESET}"; }
+log_ok()      { echo -e "${GREEN}✅  $*${RESET}"; }
+log_warn()    { echo -e "${YELLOW}⚠️  $*${RESET}"; }
+log_error()   { echo -e "${RED}❌  $*${RESET}"; }
+log_section() {
+    echo -e "\n${BOLD}${CYAN}══════════════════════════════════════════${RESET}"
+    echo -e "${BOLD}${CYAN}  🔤  $*${RESET}"
+    echo -e "${BOLD}${CYAN}══════════════════════════════════════════${RESET}"
+}
+
+# ─────────────────────────────────────────────────────────────
+# ARGUMENT PARSING
+# ─────────────────────────────────────────────────────────────
+if [[ $# -eq 0 ]]; then
+    log_error "No folder specified."
+    usage
+    exit 1
+fi
+
+FOLDER="$1"
+shift
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --start)      START_PAGE="$2";  shift 2 ;;
+        --end)        END_PAGE="$2";    shift 2 ;;
+        --python)     PYTHON_BIN="$2";  shift 2 ;;
+        --help|-h)    usage; exit 0 ;;
+        *)
+            log_error "Unknown option: $1"
+            usage
+            exit 1
+            ;;
+    esac
+done
+
+# ─────────────────────────────────────────────────────────────
+# VALIDATION
+# ─────────────────────────────────────────────────────────────
+if [[ ! -d "$FOLDER" ]]; then
+    log_error "Folder not found: $FOLDER"
+    exit 1
+fi
+
+if [[ ! -f "$RENDERER" ]]; then
+    log_error "manga-renderer.py not found at: $RENDERER"
+    exit 1
+fi
+
+if ! command -v "$PYTHON_BIN" &>/dev/null; then
+    log_error "Python binary not found: $PYTHON_BIN"
+    log_error "Try --python python3"
+    exit 1
+fi
+
+# ─────────────────────────────────────────────────────────────
+# DISCOVER IMAGES
+# ─────────────────────────────────────────────────────────────
+ALL_IMAGES=()
+while IFS= read -r -d '' img; do
+    ALL_IMAGES+=("$img")
+done < <(
+    find "$FOLDER" -maxdepth 1 -type f \
+        \( -iname "*.jpg" -o -iname "*.jpeg" \
+           -o -iname "*.png" -o -iname "*.webp" \) \
+        -print0 | sort -z
+)
+
+TOTAL=${#ALL_IMAGES[@]}
+
+if [[ $TOTAL -eq 0 ]]; then
+    log_error "No image files found in: $FOLDER"
+    exit 1
+fi
+
+# ─────────────────────────────────────────────────────────────
+# SLICE TO REQUESTED PAGE RANGE
+# ─────────────────────────────────────────────────────────────
+PAGES=()
+for i in "${!ALL_IMAGES[@]}"; do
+    PAGE_NUM=$(( i + 1 ))
+    if [[ $PAGE_NUM -ge $START_PAGE && $PAGE_NUM -le $END_PAGE ]]; then
+        PAGES+=("${ALL_IMAGES[$i]}")
+    fi
+done
+
+if [[ ${#PAGES[@]} -eq 0 ]]; then
+    log_error "No pages in range [${START_PAGE}, ${END_PAGE}] (total: ${TOTAL})"
+    exit 1
+fi
+
+# ─────────────────────────────────────────────────────────────
+# SUMMARY HEADER
+# ─────────────────────────────────────────────────────────────
+log_section "BATCH MANGA RENDERER"
+log_info "📂  Folder   : $(realpath "$FOLDER")"
+log_info "📄  Pages    : ${#PAGES[@]} of ${TOTAL} total"
+log_info "🔢  Range    : ${START_PAGE} → ${END_PAGE}"
+echo ""
+
+# ─────────────────────────────────────────────────────────────
+# PROCESS EACH PAGE
+# ─────────────────────────────────────────────────────────────
+PASS=0
+FAIL=0
+FAIL_LIST=()
+
+for i in "${!PAGES[@]}"; do
+    IMAGE="${PAGES[$i]}"
+    PAGE_NUM=$(( START_PAGE + i ))
+    STEM="$(basename "${IMAGE%.*}")"
+    WORKDIR="${FOLDER}/translated/${STEM}"
+    
+    echo ""
+    echo -e "${BOLD}──────────────────────────────────────────${RESET}"
+    echo -e "${BOLD}  🖼️  [${PAGE_NUM}/${TOTAL}]  ${STEM}${RESET}"
+    echo -e "${BOLD}──────────────────────────────────────────${RESET}"
+
+    INPUT_CLEANED="${WORKDIR}/${STEM}_cleaned.png"
+    INPUT_JSON="${WORKDIR}/bubbles.json"
+    INPUT_TXT="${WORKDIR}/output.txt"
+    OUTPUT_RENDERED="${WORKDIR}/${STEM}_translated.png"
+
+    # Check for required files
+    MISSING_FILES=0
+    for REQ_FILE in "$INPUT_CLEANED" "$INPUT_JSON" "$INPUT_TXT"; do
+        if [[ ! -f "$REQ_FILE" ]]; then
+            log_warn "Missing required file: $(basename "$REQ_FILE")"
+            MISSING_FILES=1
+        fi
+    done
+
+    if [[ $MISSING_FILES -eq 1 ]]; then
+        log_error "Skipping ${STEM} due to missing files. Did you run batch-clean.sh?"
+        FAIL=$(( FAIL + 1 ))
+        FAIL_LIST+=("${STEM} (Missing Files)")
+        continue
+    fi
+
+    log_info "🗂️  Cleaned Image : $(basename "$INPUT_CLEANED")"
+    log_info "🔤  Rendering translated text..."
+
+    # ── Run the renderer ──────────────────────────────────────
+    if "$PYTHON_BIN" "$RENDERER" \
+            -i "$INPUT_CLEANED" \
+            -j "$INPUT_JSON" \
+            -t "$INPUT_TXT" \
+            -o "$OUTPUT_RENDERED"; then
+
+        if [[ -f "$OUTPUT_RENDERED" ]]; then
+            log_ok "Translated image saved → ${STEM}_translated.png"
+            PASS=$(( PASS + 1 ))
+        else
+            log_error "Script ran but output image is missing."
+            FAIL=$(( FAIL + 1 ))
+            FAIL_LIST+=("${STEM} (Missing Output)")
+        fi
+    else
+        log_error "Page ${PAGE_NUM} FAILED — check output above."
+        FAIL=$(( FAIL + 1 ))
+        FAIL_LIST+=("${STEM} (Script Error)")
+    fi
+
+done
+
+# ─────────────────────────────────────────────────────────────
+# FINAL SUMMARY
+# ─────────────────────────────────────────────────────────────
+log_section "BATCH RENDERING COMPLETE"
+echo -e "  ✅  ${GREEN}Passed : ${PASS}${RESET}"
+echo -e "  ❌  ${RED}Failed : ${FAIL}${RESET}"
+
+if [[ ${#FAIL_LIST[@]} -gt 0 ]]; then
+    echo ""
+    log_warn "Failed pages:"
+    for NAME in "${FAIL_LIST[@]}"; do
+        echo -e "    ❌  ${RED}${NAME}${RESET}"
+    done
+fi
+
+echo ""
+log_info "📦  Output folder: $(realpath "${FOLDER}/translated")"
+echo ""
+
+[[ $FAIL -eq 0 ]] && exit 0 || exit 1
--- a/manga-renderer.py
+++ b/manga-renderer.py
@@ -3,49 +3,37 @@
 """
 manga-renderer.py

-Inputs:  001.jpg  +  bubbles.json  +  output_001.txt
-Output:  translated_page_001.png
-
-Strategy:
-  1. For every bubble, white-fill all its OCR quads (erases original text cleanly).
-  2. Detect the original font size from the OCR bounding boxes.
-  3. Dynamically wrap and scale down the translated text if it exceeds the bubble dimensions.
-  4. Render the translated text centered inside the bubble bounding box.
-  5. Uses uniform line heights (ascent + descent) to prevent Catalan accent collisions (È, À).
-  6. Adds a dynamic white stroke to the text to cover any residual original characters.
+Inputs:  16_cleaned.png + bubbles.json + output.txt
+Output:  16_translated.png
 """

 import json
 import textwrap
 import cv2
 import numpy as np
+import os
+import argparse
 from PIL import Image, ImageDraw, ImageFont
 from typing import Dict, List, Tuple, Optional, Set, Any

 # ============================================================
-# CONFIG  — edit these paths to match your setup
+# CONFIG
 # ============================================================
-IMAGE_PATH        = "004.png"
-BUBBLES_PATH      = "bubbles_004.json"
-TRANSLATIONS_PATH = "output_004.txt"
-OUTPUT_PATH       = "translated_page_004.png"
-
-# Font candidates — Prioritizes Laffayette for Catalan, with safe fallbacks
+# Added System Fallbacks (macOS, Windows, Linux) so it never fails
 FONT_CANDIDATES = [
    "fonts/animeace2_reg.ttf",
    "fonts/ComicNeue-Bold.ttf",
+    "/Library/Fonts/Arial.ttf",                 # macOS
+    "/System/Library/Fonts/Helvetica.ttc",      # macOS
+    "C:\\Windows\\Fonts\\arial.ttf",            # Windows
+    "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf" # Linux
 ]

 DEFAULT_FONT_SIZE = 18
 MIN_FONT_SIZE     = 8
-QUAD_PAD          = 4    # extra pixels added around each quad before white-fill

-# ============================================================
-# SKIP LIST
-# ============================================================
-SKIP_BUBBLE_IDS: Set[int] = {
-    # Add any bubble IDs you do NOT want rendered here.
-}
+# Add any bubble IDs you do NOT want rendered here.
+SKIP_BUBBLE_IDS: Set[int] = set()

 # ============================================================
 # FONT LOADER
@@ -65,20 +53,17 @@ def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]:
 def resolve_font_path() -> str:
    """Return the path for the first working candidate."""
    for candidate in FONT_CANDIDATES:
-        if load_font(candidate, DEFAULT_FONT_SIZE) is not None:
-            print(f"   ✅ Font: {candidate}")
+        if os.path.exists(candidate) and load_font(candidate, DEFAULT_FONT_SIZE) is not None:
+            print(f"   ✅ Font loaded: {candidate}")
            return candidate
-    print("   ⚠️  No TrueType font found — using Pillow bitmap fallback")
+    print("   ⚠️  No TrueType font found — using Pillow bitmap fallback (Text may look small)")
    return ""

 # ============================================================
 # PARSERS
 # ============================================================
 def parse_translations(filepath: str) -> Dict[int, str]:
-    """
-    Reads output.txt and returns {bubble_id: translated_text}.
-    Lines look like:  #2|1|vision-base|ORIGINAL|TRANSLATED|FLAGS
-    """
+    """Reads output.txt and returns {bubble_id: translated_text}."""
    translations = {}
    with open(filepath, "r", encoding="utf-8") as f:
        for line in f:
@@ -86,11 +71,11 @@ def parse_translations(filepath: str) -> Dict[int, str]:
            if not line.startswith("#"):
                continue
            parts = line.split("|")
-            if len(parts) < 5:
+            if len(parts) < 9:
                continue
            try:
                bid        = int(parts[0].lstrip("#"))
-                translated = parts[4].strip()
+                translated = parts[8].strip() # Index 8 is TRANSLATED
                if translated and translated != "-":
                    translations[bid] = translated
            except ValueError:
@@ -98,70 +83,23 @@ def parse_translations(filepath: str) -> Dict[int, str]:
    return translations

 def parse_bubbles(filepath: str):
-    """Returns the full JSON data."""
    with open(filepath, "r", encoding="utf-8") as f:
-        data = json.load(f)
-    return data
-
-# ============================================================
-# ERASE  — white-fill every OCR quad (with small padding)
-# ============================================================
-def erase_quads(
-    image_bgr,
-    bubbles_data: Dict[str, dict],
-    translations: Dict[int, str],
-    skip_ids: Set[int],
-    pad: int = QUAD_PAD
-):
-    """
-    White-fills OCR quads ONLY for bubbles that:
-      - have a translation in output.txt  AND
-      - are NOT in SKIP_BUBBLE_IDS
-    """
-    ih, iw = image_bgr.shape[:2]
-    result = image_bgr.copy()
-
-    erased_count  = 0
-    skipped_count = 0
-
-    for bid_str, val in bubbles_data.items():
-        bid = int(bid_str)
-        quads = val.get("quads", [])
-
-        if bid in skip_ids or bid not in translations:
-            skipped_count += 1
-            continue
-
-        for quad in quads:
-            pts = np.array(quad, dtype=np.int32)
-            cv2.fillPoly(result, [pts], (255, 255, 255))
-
-            xs = [p[0] for p in quad]; ys = [p[1] for p in quad]
-            x1 = max(0,      min(xs) - pad)
-            y1 = max(0,      min(ys) - pad)
-            x2 = min(iw - 1, max(xs) + pad)
-            y2 = min(ih - 1, max(ys) + pad)
-            cv2.rectangle(result, (x1, y1), (x2, y2), (255, 255, 255), -1)
-
-        erased_count += 1
-
-    print(f"   Erased : {erased_count} bubbles")
-    print(f"   Ignored: {skipped_count} bubbles (no translation or in skip list)")
-    return result
+        return json.load(f)

 # ============================================================
 # DYNAMIC TEXT FITTING
 # ============================================================
 def get_original_font_size(bubble_data: dict, fallback_size: int = DEFAULT_FONT_SIZE) -> int:
-    """Calculates the original font size based on the OCR bounding boxes."""
-    line_bboxes = bubble_data.get("line_bboxes", [])
-    if not line_bboxes:
+    box = bubble_data.get("box")
+    lines = bubble_data.get("lines", [])
+    
+    if not box or not lines:
        return fallback_size
    
-    heights = [box["h"] for box in line_bboxes]
-    median_h = int(np.median(heights))
+    line_count = len(lines)
+    estimated_line_height = box["h"] / max(1, line_count)
+    estimated_size = int(estimated_line_height * 0.85)
    
-    estimated_size = int(median_h * 0.85)
    return max(MIN_FONT_SIZE, min(estimated_size, 60))

 def fit_text_dynamically(
@@ -171,16 +109,11 @@ def fit_text_dynamically(
    max_h: int, 
    target_font_size: int
 ) -> Tuple[List[str], Any, int, int]:
-    """
-    Wraps text and scales down font size if it exceeds the bubble dimensions.
-    Returns: (wrapped_lines, font_object, line_spacing, final_font_size)
-    """
    font_size = target_font_size
    
    if not font_path:
        font = ImageFont.load_default()
-        char_w = 6
-        chars_per_line = max(1, int(max_w / char_w))
+        chars_per_line = max(1, int(max_w / 6))
        wrapped_lines = textwrap.wrap(text, width=chars_per_line)
        return wrapped_lines, font, 4, 10
    
@@ -196,7 +129,6 @@ def fit_text_dynamically(
        
        wrapped_lines = textwrap.wrap(text, width=chars_per_line)
        
-        # Use uniform font metrics for height to protect accents like È
        line_spacing = max(2, int(font_size * 0.15))
        if hasattr(font, 'getmetrics'):
            ascent, descent = font.getmetrics()
@@ -235,10 +167,6 @@ def render_text(
    font_path: str,
    skip_ids: Set[int]
 ):
-    """
-    Draws the translated text centered in the line_union_bbox of each bubble.
-    Adds a dynamic white stroke (outline) to cover any residual original characters.
-    """
    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
    pil_img = Image.fromarray(image_rgb)
    draw = ImageDraw.Draw(pil_img)
@@ -252,14 +180,11 @@ def render_text(
            continue

        text = translations[bid]
+        box = val.get("box")
+        if not box:
+            continue
            
-        union_box = val.get("line_union_bbox")
-        if not union_box:
-            union_box = val.get("text_bbox")
-            if not union_box:
-                continue
-            
-        bx, by, bw, bh = union_box["x"], union_box["y"], union_box["w"], union_box["h"]
+        bx, by, bw, bh = box["x"], box["y"], box["w"], box["h"]
        
        pad_x = int(bw * 0.1)
        pad_y = int(bh * 0.1)
@@ -271,7 +196,6 @@ def render_text(
        target_size = get_original_font_size(val)
        wrapped_lines, font, line_spacing, final_size = fit_text_dynamically(text, font_path, bw, bh, target_size)
        
-        # Use uniform typographic line height for rendering to protect accents
        if hasattr(font, 'getmetrics'):
            ascent, descent = font.getmetrics()
            line_h = ascent + descent
@@ -279,11 +203,17 @@ def render_text(
            line_h = final_size
                
        total_text_height = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
-        
        current_y = by + (bh - total_text_height) // 2
        
-        # Dynamic outline thickness based on the final scaled font size
-        outline_thickness = max(2, int(final_size * 0.10))
+        # --- SMART OUTLINE LOGIC ---
+        bg_type = val.get("background_type", "white")
+        
+        # Only use a white outline if the background is complex (inpainted artwork).
+        # If it's a white bubble, or if we are using the tiny default font, disable the outline.
+        if bg_type == "complex" and font_path:
+            outline_thickness = max(1, int(final_size * 0.05))
+        else:
+            outline_thickness = 0

        for i, line in enumerate(wrapped_lines):
            if hasattr(font, 'getbbox'):
@@ -294,7 +224,6 @@ def render_text(
            
            current_x = bx + (bw - lw) // 2
            
-            # Draw text with white stroke for artifact coverage
            draw.text(
                (current_x, current_y), 
                line, 
@@ -304,53 +233,59 @@ def render_text(
                stroke_fill=(255, 255, 255)
            )
            
-            # Advance Y by the uniform line height + spacing
            current_y += line_h + line_spacing

        rendered_count += 1

-    print(f"   Rendered: {rendered_count} bubbles (with uniform line spacing & outlines)")
+    print(f"   Rendered: {rendered_count} bubbles")
    return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)

 # ============================================================
 # MAIN
 # ============================================================
 def main():
-    print(f"Loading image: {IMAGE_PATH}")
-    image_bgr = cv2.imread(IMAGE_PATH)
-    if image_bgr is None:
-        print(f"❌ Error: Could not load {IMAGE_PATH}")
+    parser = argparse.ArgumentParser(description="Render translated text onto cleaned manga pages.")
+    parser.add_argument("-i", "--image", required=True, help="Path to the CLEANED manga image")
+    parser.add_argument("-j", "--json", required=True, help="Path to bubbles.json")
+    parser.add_argument("-t", "--txt", required=True, help="Path to output.txt")
+    parser.add_argument("-o", "--output", help="Path to save the final translated image")
+    
+    args = parser.parse_args()
+
+    if not os.path.exists(args.image):
+        print(f"❌ Error: Image file not found at {args.image}")
        return
        
-    print(f"Loading translations: {TRANSLATIONS_PATH}")
-    translations = parse_translations(TRANSLATIONS_PATH)
+    print(f"📂 Loading cleaned image: {args.image}")
+    image_bgr = cv2.imread(args.image)
    
-    print(f"Loading bubble data: {BUBBLES_PATH}")
-    bubbles_data = parse_bubbles(BUBBLES_PATH)
+    print(f"📂 Loading translations: {args.txt}")
+    translations = parse_translations(args.txt)
    
-    print("Resolving font...")
+    print(f"📂 Loading bubble data: {args.json}")
+    bubbles_data = parse_bubbles(args.json)
+
+    print("🔍 Resolving font...")
    font_path = resolve_font_path()

-    print("\n--- Step 1: Erasing original text ---")
-    erased_bgr = erase_quads(
-        image_bgr=image_bgr,
-        bubbles_data=bubbles_data,
-        translations=translations,
-        skip_ids=SKIP_BUBBLE_IDS,
-        pad=QUAD_PAD
-    )
-
-    print("\n--- Step 2: Rendering translated text ---")
+    print("\n--- Rendering translated text ---")
    final_bgr = render_text(
-        image_bgr=erased_bgr,
+        image_bgr=image_bgr,
        bubbles_data=bubbles_data,
        translations=translations,
        font_path=font_path,
        skip_ids=SKIP_BUBBLE_IDS
    )

-    print(f"\nSaving final image to: {OUTPUT_PATH}")
-    cv2.imwrite(OUTPUT_PATH, final_bgr)
+    if args.output:
+        out_path = args.output
+    else:
+        base_name = args.image.replace("_cleaned", "")
+        base_name, ext = os.path.splitext(base_name)
+        out_path = f"{base_name}_translated{ext}"
+
+    print(f"\n💾 Saving final image to: {out_path}")
+    cv2.imwrite(out_path, final_bgr)
    print("✅ Done!")

 if __name__ == "__main__":