Added all

2026-04-23 19:28:39 +02:00
parent fd0339d8ca
commit 832d699917
3 changed files with 312 additions and 137 deletions
--- a/batch-clean.sh
+++ b/batch-clean.sh
--- a/batch-renderer.sh
+++ b/batch-renderer.sh
@@ -0,0 +1,240 @@
 #!/usr/bin/env bash
 # ============================================================
 # batch-renderer.sh
 # Batch manga text rendering using cleaned images and output.txt
 #
 # Usage:
 #   ./batch-renderer.sh <folder>
 #   ./batch-renderer.sh <folder> --start 3 --end 7
 #
 # Output per page lands in:
 #   <folder>/translated/<page_stem>/
 #     └── <page_stem>_translated.png
 # ============================================================
 set -uo pipefail
 # ─────────────────────────────────────────────────────────────
 # CONFIGURATION
 # ─────────────────────────────────────────────────────────────
 START_PAGE=1
 END_PAGE=999999
 PYTHON_BIN="python"
 SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 RENDERER="${SCRIPT_DIR}/manga-renderer.py"
 # ─────────────────────────────────────────────────────────────
 # COLOURS
 # ─────────────────────────────────────────────────────────────
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 CYAN='\033[0;36m'
 BOLD='\033[1m'
 RESET='\033[0m'
 # ─────────────────────────────────────────────────────────────
 # HELPERS
 # ─────────────────────────────────────────────────────────────
 usage() {
    echo ""
    echo -e "${BOLD}Usage:${RESET}"
    echo "  $0 <folder> [options]"
    echo ""
    echo -e "${BOLD}Options:${RESET}"
    echo "  --start         First page number     (default: 1)"
    echo "  --end           Last  page number     (default: all)"
    echo "  --python        Python binary         (default: python)"
    echo "  --help,    -h   Show this help"
    echo ""
    echo -e "${BOLD}Examples:${RESET}"
    echo "  $0 pages-for-tests"
    echo "  $0 pages-for-tests --start 3 --end 7"
    echo ""
 }
 log_info()    { echo -e "${CYAN}ℹ️  $*${RESET}"; }
 log_ok()      { echo -e "${GREEN}✅  $*${RESET}"; }
 log_warn()    { echo -e "${YELLOW}⚠️  $*${RESET}"; }
 log_error()   { echo -e "${RED}❌  $*${RESET}"; }
 log_section() {
    echo -e "\n${BOLD}${CYAN}══════════════════════════════════════════${RESET}"
    echo -e "${BOLD}${CYAN}  🔤  $*${RESET}"
    echo -e "${BOLD}${CYAN}══════════════════════════════════════════${RESET}"
 }
 # ─────────────────────────────────────────────────────────────
 # ARGUMENT PARSING
 # ─────────────────────────────────────────────────────────────
 if [[ $# -eq 0 ]]; then
    log_error "No folder specified."
    usage
    exit 1
 fi
 FOLDER="$1"
 shift
 while [[ $# -gt 0 ]]; do
    case "$1" in
        --start)      START_PAGE="$2";  shift 2 ;;
        --end)        END_PAGE="$2";    shift 2 ;;
        --python)     PYTHON_BIN="$2";  shift 2 ;;
        --help|-h)    usage; exit 0 ;;
        *)
            log_error "Unknown option: $1"
            usage
            exit 1
            ;;
    esac
 done
 # ─────────────────────────────────────────────────────────────
 # VALIDATION
 # ─────────────────────────────────────────────────────────────
 if [[ ! -d "$FOLDER" ]]; then
    log_error "Folder not found: $FOLDER"
    exit 1
 fi
 if [[ ! -f "$RENDERER" ]]; then
    log_error "manga-renderer.py not found at: $RENDERER"
    exit 1
 fi
 if ! command -v "$PYTHON_BIN" &>/dev/null; then
    log_error "Python binary not found: $PYTHON_BIN"
    log_error "Try --python python3"
    exit 1
 fi
 # ─────────────────────────────────────────────────────────────
 # DISCOVER IMAGES
 # ─────────────────────────────────────────────────────────────
 ALL_IMAGES=()
 while IFS= read -r -d '' img; do
    ALL_IMAGES+=("$img")
 done < <(
    find "$FOLDER" -maxdepth 1 -type f \
        \( -iname "*.jpg" -o -iname "*.jpeg" \
           -o -iname "*.png" -o -iname "*.webp" \) \
        -print0 | sort -z
 )
 TOTAL=${#ALL_IMAGES[@]}
 if [[ $TOTAL -eq 0 ]]; then
    log_error "No image files found in: $FOLDER"
    exit 1
 fi
 # ─────────────────────────────────────────────────────────────
 # SLICE TO REQUESTED PAGE RANGE
 # ─────────────────────────────────────────────────────────────
 PAGES=()
 for i in "${!ALL_IMAGES[@]}"; do
    PAGE_NUM=$(( i + 1 ))
    if [[ $PAGE_NUM -ge $START_PAGE && $PAGE_NUM -le $END_PAGE ]]; then
        PAGES+=("${ALL_IMAGES[$i]}")
    fi
 done
 if [[ ${#PAGES[@]} -eq 0 ]]; then
    log_error "No pages in range [${START_PAGE}, ${END_PAGE}] (total: ${TOTAL})"
    exit 1
 fi
 # ─────────────────────────────────────────────────────────────
 # SUMMARY HEADER
 # ─────────────────────────────────────────────────────────────
 log_section "BATCH MANGA RENDERER"
 log_info "📂  Folder   : $(realpath "$FOLDER")"
 log_info "📄  Pages    : ${#PAGES[@]} of ${TOTAL} total"
 log_info "🔢  Range    : ${START_PAGE} → ${END_PAGE}"
 echo ""
 # ─────────────────────────────────────────────────────────────
 # PROCESS EACH PAGE
 # ─────────────────────────────────────────────────────────────
 PASS=0
 FAIL=0
 FAIL_LIST=()
 for i in "${!PAGES[@]}"; do
    IMAGE="${PAGES[$i]}"
    PAGE_NUM=$(( START_PAGE + i ))
    STEM="$(basename "${IMAGE%.*}")"
    WORKDIR="${FOLDER}/translated/${STEM}"
    echo ""
    echo -e "${BOLD}──────────────────────────────────────────${RESET}"
    echo -e "${BOLD}  🖼️  [${PAGE_NUM}/${TOTAL}]  ${STEM}${RESET}"
    echo -e "${BOLD}──────────────────────────────────────────${RESET}"
    INPUT_CLEANED="${WORKDIR}/${STEM}_cleaned.png"
    INPUT_JSON="${WORKDIR}/bubbles.json"
    INPUT_TXT="${WORKDIR}/output.txt"
    OUTPUT_RENDERED="${WORKDIR}/${STEM}_translated.png"
    # Check for required files
    MISSING_FILES=0
    for REQ_FILE in "$INPUT_CLEANED" "$INPUT_JSON" "$INPUT_TXT"; do
        if [[ ! -f "$REQ_FILE" ]]; then
            log_warn "Missing required file: $(basename "$REQ_FILE")"
            MISSING_FILES=1
        fi
    done
    if [[ $MISSING_FILES -eq 1 ]]; then
        log_error "Skipping ${STEM} due to missing files. Did you run batch-clean.sh?"
        FAIL=$(( FAIL + 1 ))
        FAIL_LIST+=("${STEM} (Missing Files)")
        continue
    fi
    log_info "🗂️  Cleaned Image : $(basename "$INPUT_CLEANED")"
    log_info "🔤  Rendering translated text..."
    # ── Run the renderer ──────────────────────────────────────
    if "$PYTHON_BIN" "$RENDERER" \
            -i "$INPUT_CLEANED" \
            -j "$INPUT_JSON" \
            -t "$INPUT_TXT" \
            -o "$OUTPUT_RENDERED"; then
        if [[ -f "$OUTPUT_RENDERED" ]]; then
            log_ok "Translated image saved → ${STEM}_translated.png"
            PASS=$(( PASS + 1 ))
        else
            log_error "Script ran but output image is missing."
            FAIL=$(( FAIL + 1 ))
            FAIL_LIST+=("${STEM} (Missing Output)")
        fi
    else
        log_error "Page ${PAGE_NUM} FAILED — check output above."
        FAIL=$(( FAIL + 1 ))
        FAIL_LIST+=("${STEM} (Script Error)")
    fi
 done
 # ─────────────────────────────────────────────────────────────
 # FINAL SUMMARY
 # ─────────────────────────────────────────────────────────────
 log_section "BATCH RENDERING COMPLETE"
 echo -e "  ✅  ${GREEN}Passed : ${PASS}${RESET}"
 echo -e "  ❌  ${RED}Failed : ${FAIL}${RESET}"
 if [[ ${#FAIL_LIST[@]} -gt 0 ]]; then
    echo ""
    log_warn "Failed pages:"
    for NAME in "${FAIL_LIST[@]}"; do
        echo -e "    ❌  ${RED}${NAME}${RESET}"
    done
 fi
 echo ""
 log_info "📦  Output folder: $(realpath "${FOLDER}/translated")"
 echo ""
 [[ $FAIL -eq 0 ]] && exit 0 || exit 1
--- a/manga-renderer.py
+++ b/manga-renderer.py
@@ -3,49 +3,37 @@
 """
 manga-renderer.py
-Inputs:  001.jpg  +  bubbles.json  +  output_001.txt
+Inputs:  16_cleaned.png + bubbles.json + output.txt
-Output:  translated_page_001.png
+Output:  16_translated.png
 Strategy:
  1. For every bubble, white-fill all its OCR quads (erases original text cleanly).
  2. Detect the original font size from the OCR bounding boxes.
  3. Dynamically wrap and scale down the translated text if it exceeds the bubble dimensions.
  4. Render the translated text centered inside the bubble bounding box.
  5. Uses uniform line heights (ascent + descent) to prevent Catalan accent collisions (È, À).
  6. Adds a dynamic white stroke to the text to cover any residual original characters.
 """
 import json
 import textwrap
 import cv2
 import numpy as np
 import os
 import argparse
 from PIL import Image, ImageDraw, ImageFont
 from typing import Dict, List, Tuple, Optional, Set, Any
 # ============================================================
-# CONFIG  — edit these paths to match your setup
+# CONFIG
 # ============================================================
-IMAGE_PATH        = "004.png"
+# Added System Fallbacks (macOS, Windows, Linux) so it never fails
 BUBBLES_PATH      = "bubbles_004.json"
 TRANSLATIONS_PATH = "output_004.txt"
 OUTPUT_PATH       = "translated_page_004.png"
 # Font candidates — Prioritizes Laffayette for Catalan, with safe fallbacks
 FONT_CANDIDATES = [
    "fonts/animeace2_reg.ttf",
    "fonts/ComicNeue-Bold.ttf",
    "/Library/Fonts/Arial.ttf",                 # macOS
    "/System/Library/Fonts/Helvetica.ttc",      # macOS
    "C:\\Windows\\Fonts\\arial.ttf",            # Windows
    "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf" # Linux
 ]
 DEFAULT_FONT_SIZE = 18
 MIN_FONT_SIZE     = 8
 QUAD_PAD          = 4    # extra pixels added around each quad before white-fill
-# ============================================================
+# Add any bubble IDs you do NOT want rendered here.
-# SKIP LIST
+SKIP_BUBBLE_IDS: Set[int] = set()
 # ============================================================
 SKIP_BUBBLE_IDS: Set[int] = {
    # Add any bubble IDs you do NOT want rendered here.
 }
 # ============================================================
 # FONT LOADER
@@ -65,20 +53,17 @@ def load_font(path: str, size: int) -> Optional[ImageFont.FreeTypeFont]:
 def resolve_font_path() -> str:
    """Return the path for the first working candidate."""
    for candidate in FONT_CANDIDATES:
-        if load_font(candidate, DEFAULT_FONT_SIZE) is not None:
+        if os.path.exists(candidate) and load_font(candidate, DEFAULT_FONT_SIZE) is not None:
-            print(f"   ✅ Font: {candidate}")
+            print(f"   ✅ Font loaded: {candidate}")
            return candidate
-    print("   ⚠️  No TrueType font found — using Pillow bitmap fallback")
+    print("   ⚠️  No TrueType font found — using Pillow bitmap fallback (Text may look small)")
    return ""
 # ============================================================
 # PARSERS
 # ============================================================
 def parse_translations(filepath: str) -> Dict[int, str]:
-    """
+    """Reads output.txt and returns {bubble_id: translated_text}."""
    Reads output.txt and returns {bubble_id: translated_text}.
    Lines look like:  #2|1|vision-base|ORIGINAL|TRANSLATED|FLAGS
    """
    translations = {}
    with open(filepath, "r", encoding="utf-8") as f:
        for line in f:
@@ -86,11 +71,11 @@ def parse_translations(filepath: str) -> Dict[int, str]:
            if not line.startswith("#"):
                continue
            parts = line.split("|")
-            if len(parts) < 5:
+            if len(parts) < 9:
                continue
            try:
                bid        = int(parts[0].lstrip("#"))
-                translated = parts[4].strip()
+                translated = parts[8].strip() # Index 8 is TRANSLATED
                if translated and translated != "-":
                    translations[bid] = translated
            except ValueError:
@@ -98,70 +83,23 @@ def parse_translations(filepath: str) -> Dict[int, str]:
    return translations
 def parse_bubbles(filepath: str):
    """Returns the full JSON data."""
    with open(filepath, "r", encoding="utf-8") as f:
-        data = json.load(f)
+        return json.load(f)
    return data
 # ============================================================
 # ERASE  — white-fill every OCR quad (with small padding)
 # ============================================================
 def erase_quads(
    image_bgr,
    bubbles_data: Dict[str, dict],
    translations: Dict[int, str],
    skip_ids: Set[int],
    pad: int = QUAD_PAD
 ):
    """
    White-fills OCR quads ONLY for bubbles that:
      - have a translation in output.txt  AND
      - are NOT in SKIP_BUBBLE_IDS
    """
    ih, iw = image_bgr.shape[:2]
    result = image_bgr.copy()
    erased_count  = 0
    skipped_count = 0
    for bid_str, val in bubbles_data.items():
        bid = int(bid_str)
        quads = val.get("quads", [])
        if bid in skip_ids or bid not in translations:
            skipped_count += 1
            continue
        for quad in quads:
            pts = np.array(quad, dtype=np.int32)
            cv2.fillPoly(result, [pts], (255, 255, 255))
            xs = [p[0] for p in quad]; ys = [p[1] for p in quad]
            x1 = max(0,      min(xs) - pad)
            y1 = max(0,      min(ys) - pad)
            x2 = min(iw - 1, max(xs) + pad)
            y2 = min(ih - 1, max(ys) + pad)
            cv2.rectangle(result, (x1, y1), (x2, y2), (255, 255, 255), -1)
        erased_count += 1
    print(f"   Erased : {erased_count} bubbles")
    print(f"   Ignored: {skipped_count} bubbles (no translation or in skip list)")
    return result
 # ============================================================
 # DYNAMIC TEXT FITTING
 # ============================================================
 def get_original_font_size(bubble_data: dict, fallback_size: int = DEFAULT_FONT_SIZE) -> int:
-    """Calculates the original font size based on the OCR bounding boxes."""
+    box = bubble_data.get("box")
-    line_bboxes = bubble_data.get("line_bboxes", [])
+    lines = bubble_data.get("lines", [])
-    if not line_bboxes:
+    
    if not box or not lines:
        return fallback_size
-    heights = [box["h"] for box in line_bboxes]
+    line_count = len(lines)
-    median_h = int(np.median(heights))
+    estimated_line_height = box["h"] / max(1, line_count)
    estimated_size = int(estimated_line_height * 0.85)
    estimated_size = int(median_h * 0.85)
    return max(MIN_FONT_SIZE, min(estimated_size, 60))
 def fit_text_dynamically(
@@ -171,16 +109,11 @@ def fit_text_dynamically(
    max_h: int, 
    target_font_size: int
 ) -> Tuple[List[str], Any, int, int]:
    """
    Wraps text and scales down font size if it exceeds the bubble dimensions.
    Returns: (wrapped_lines, font_object, line_spacing, final_font_size)
    """
    font_size = target_font_size
    if not font_path:
        font = ImageFont.load_default()
-        char_w = 6
+        chars_per_line = max(1, int(max_w / 6))
        chars_per_line = max(1, int(max_w / char_w))
        wrapped_lines = textwrap.wrap(text, width=chars_per_line)
        return wrapped_lines, font, 4, 10
@@ -196,7 +129,6 @@ def fit_text_dynamically(
        wrapped_lines = textwrap.wrap(text, width=chars_per_line)
        # Use uniform font metrics for height to protect accents like È
        line_spacing = max(2, int(font_size * 0.15))
        if hasattr(font, 'getmetrics'):
            ascent, descent = font.getmetrics()
@@ -235,10 +167,6 @@ def render_text(
    font_path: str,
    skip_ids: Set[int]
 ):
    """
    Draws the translated text centered in the line_union_bbox of each bubble.
    Adds a dynamic white stroke (outline) to cover any residual original characters.
    """
    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
    pil_img = Image.fromarray(image_rgb)
    draw = ImageDraw.Draw(pil_img)
@@ -252,14 +180,11 @@ def render_text(
            continue
        text = translations[bid]
        box = val.get("box")
        if not box:
            continue
-        union_box = val.get("line_union_bbox")
+        bx, by, bw, bh = box["x"], box["y"], box["w"], box["h"]
        if not union_box:
            union_box = val.get("text_bbox")
            if not union_box:
                continue
        bx, by, bw, bh = union_box["x"], union_box["y"], union_box["w"], union_box["h"]
        pad_x = int(bw * 0.1)
        pad_y = int(bh * 0.1)
@@ -271,7 +196,6 @@ def render_text(
        target_size = get_original_font_size(val)
        wrapped_lines, font, line_spacing, final_size = fit_text_dynamically(text, font_path, bw, bh, target_size)
        # Use uniform typographic line height for rendering to protect accents
        if hasattr(font, 'getmetrics'):
            ascent, descent = font.getmetrics()
            line_h = ascent + descent
@@ -279,11 +203,17 @@ def render_text(
            line_h = final_size
        total_text_height = (line_h * len(wrapped_lines)) + (line_spacing * max(0, len(wrapped_lines) - 1))
        current_y = by + (bh - total_text_height) // 2
-        # Dynamic outline thickness based on the final scaled font size
+        # --- SMART OUTLINE LOGIC ---
-        outline_thickness = max(2, int(final_size * 0.10))
+        bg_type = val.get("background_type", "white")
        # Only use a white outline if the background is complex (inpainted artwork).
        # If it's a white bubble, or if we are using the tiny default font, disable the outline.
        if bg_type == "complex" and font_path:
            outline_thickness = max(1, int(final_size * 0.05))
        else:
            outline_thickness = 0
        for i, line in enumerate(wrapped_lines):
            if hasattr(font, 'getbbox'):
@@ -294,7 +224,6 @@ def render_text(
            current_x = bx + (bw - lw) // 2
            # Draw text with white stroke for artifact coverage
            draw.text(
                (current_x, current_y), 
                line, 
@@ -304,53 +233,59 @@ def render_text(
                stroke_fill=(255, 255, 255)
            )
            # Advance Y by the uniform line height + spacing
            current_y += line_h + line_spacing
        rendered_count += 1
-    print(f"   Rendered: {rendered_count} bubbles (with uniform line spacing & outlines)")
+    print(f"   Rendered: {rendered_count} bubbles")
    return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
 # ============================================================
 # MAIN
 # ============================================================
 def main():
-    print(f"Loading image: {IMAGE_PATH}")
+    parser = argparse.ArgumentParser(description="Render translated text onto cleaned manga pages.")
-    image_bgr = cv2.imread(IMAGE_PATH)
+    parser.add_argument("-i", "--image", required=True, help="Path to the CLEANED manga image")
-    if image_bgr is None:
+    parser.add_argument("-j", "--json", required=True, help="Path to bubbles.json")
-        print(f"❌ Error: Could not load {IMAGE_PATH}")
+    parser.add_argument("-t", "--txt", required=True, help="Path to output.txt")
    parser.add_argument("-o", "--output", help="Path to save the final translated image")
    args = parser.parse_args()
    if not os.path.exists(args.image):
        print(f"❌ Error: Image file not found at {args.image}")
        return
-    print(f"Loading translations: {TRANSLATIONS_PATH}")
+    print(f"📂 Loading cleaned image: {args.image}")
-    translations = parse_translations(TRANSLATIONS_PATH)
+    image_bgr = cv2.imread(args.image)
-    print(f"Loading bubble data: {BUBBLES_PATH}")
+    print(f"📂 Loading translations: {args.txt}")
-    bubbles_data = parse_bubbles(BUBBLES_PATH)
+    translations = parse_translations(args.txt)
-    print("Resolving font...")
+    print(f"📂 Loading bubble data: {args.json}")
    bubbles_data = parse_bubbles(args.json)
    print("🔍 Resolving font...")
    font_path = resolve_font_path()
-    print("\n--- Step 1: Erasing original text ---")
+    print("\n--- Rendering translated text ---")
    erased_bgr = erase_quads(
        image_bgr=image_bgr,
        bubbles_data=bubbles_data,
        translations=translations,
        skip_ids=SKIP_BUBBLE_IDS,
        pad=QUAD_PAD
    )
    print("\n--- Step 2: Rendering translated text ---")
    final_bgr = render_text(
-        image_bgr=erased_bgr,
+        image_bgr=image_bgr,
        bubbles_data=bubbles_data,
        translations=translations,
        font_path=font_path,
        skip_ids=SKIP_BUBBLE_IDS
    )
-    print(f"\nSaving final image to: {OUTPUT_PATH}")
+    if args.output:
-    cv2.imwrite(OUTPUT_PATH, final_bgr)
+        out_path = args.output
    else:
        base_name = args.image.replace("_cleaned", "")
        base_name, ext = os.path.splitext(base_name)
        out_path = f"{base_name}_translated{ext}"
    print(f"\n💾 Saving final image to: {out_path}")
    cv2.imwrite(out_path, final_bgr)
    print("✅ Done!")
 if __name__ == "__main__":